early-access version 1255

2020-12-28 15:15:37 +00:00
parent 84b39492d1
commit 78b48028e1
6254 changed files with 1868140 additions and 0 deletions
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -0,0 +1,166 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::SubOp;
+
+u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    Node op_a = GetRegister(instr.gpr8);
+
+    Node op_b = [&] {
+        if (instr.is_b_imm) {
+            return GetImmediate19(instr);
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::MOV_C:
+    case OpCode::Id::MOV_R: {
+        // MOV does not have neither 'abs' nor 'neg' bits.
+        SetRegister(bb, instr.gpr0, op_b);
+        break;
+    }
+    case OpCode::Id::FMUL_C:
+    case OpCode::Id::FMUL_R:
+    case OpCode::Id::FMUL_IMM: {
+        // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
+        if (instr.fmul.tab5cb8_2 != 0) {
+            LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
+                      instr.fmul.tab5cb8_2.Value());
+        }
+        if (instr.fmul.tab5c68_0 != 1) {
+            LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
+                      instr.fmul.tab5c68_0.Value());
+        }
+
+        op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
+
+        static constexpr std::array FmulPostFactor = {
+            1.000f, // None
+            0.500f, // Divide 2
+            0.250f, // Divide 4
+            0.125f, // Divide 8
+            8.000f, // Mul 8
+            4.000f, // Mul 4
+            2.000f, // Mul 2
+        };
+
+        if (instr.fmul.postfactor != 0) {
+            op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
+                             Immediate(FmulPostFactor[instr.fmul.postfactor]));
+        }
+
+        // TODO(Rodrigo): Should precise be used when there's a postfactor?
+        Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
+
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::FADD_C:
+    case OpCode::Id::FADD_R:
+    case OpCode::Id::FADD_IMM: {
+        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
+        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
+
+        Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::MUFU: {
+        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
+
+        Node value = [&]() {
+            switch (instr.sub_op) {
+            case SubOp::Cos:
+                return Operation(OperationCode::FCos, PRECISE, op_a);
+            case SubOp::Sin:
+                return Operation(OperationCode::FSin, PRECISE, op_a);
+            case SubOp::Ex2:
+                return Operation(OperationCode::FExp2, PRECISE, op_a);
+            case SubOp::Lg2:
+                return Operation(OperationCode::FLog2, PRECISE, op_a);
+            case SubOp::Rcp:
+                return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
+            case SubOp::Rsq:
+                return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
+            case SubOp::Sqrt:
+                return Operation(OperationCode::FSqrt, PRECISE, op_a);
+            default:
+                UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
+                return Immediate(0);
+            }
+        }();
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::FMNMX_C:
+    case OpCode::Id::FMNMX_R:
+    case OpCode::Id::FMNMX_IMM: {
+        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
+        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
+
+        const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
+
+        const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
+        const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
+        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
+
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::FCMP_RR:
+    case OpCode::Id::FCMP_RC:
+    case OpCode::Id::FCMP_IMMR: {
+        UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
+        Node op_c = GetRegister(instr.gpr39);
+        Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
+        SetRegister(
+            bb, instr.gpr0,
+            Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
+        break;
+    }
+    case OpCode::Id::RRO_C:
+    case OpCode::Id::RRO_R:
+    case OpCode::Id::RRO_IMM: {
+        LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
+
+        // Currently RRO is only implemented as a register move.
+        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
+        SetRegister(bb, instr.gpr0, op_b);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -0,0 +1,101 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::HalfType;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    bool negate_a = false;
+    bool negate_b = false;
+    bool absolute_a = false;
+    bool absolute_b = false;
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::HADD2_R:
+        if (instr.alu_half.ftz == 0) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+        }
+        negate_a = ((instr.value >> 43) & 1) != 0;
+        negate_b = ((instr.value >> 31) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 30) & 1) != 0;
+        break;
+    case OpCode::Id::HADD2_C:
+        if (instr.alu_half.ftz == 0) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+        }
+        negate_a = ((instr.value >> 43) & 1) != 0;
+        negate_b = ((instr.value >> 56) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 54) & 1) != 0;
+        break;
+    case OpCode::Id::HMUL2_R:
+        negate_a = ((instr.value >> 43) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 30) & 1) != 0;
+        break;
+    case OpCode::Id::HMUL2_C:
+        negate_b = ((instr.value >> 31) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 54) & 1) != 0;
+        break;
+    default:
+        UNREACHABLE();
+        break;
+    }
+
+    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
+    op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
+
+    auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HADD2_C:
+        case OpCode::Id::HMUL2_C:
+            return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+        case OpCode::Id::HADD2_R:
+        case OpCode::Id::HMUL2_R:
+            return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
+        default:
+            UNREACHABLE();
+            return {HalfType::F32, Immediate(0)};
+        }
+    }();
+    op_b = UnpackHalfFloat(op_b, type_b);
+    op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
+
+    Node value = [this, opcode, op_a, op_b = op_b] {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HADD2_C:
+        case OpCode::Id::HADD2_R:
+            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
+        case OpCode::Id::HMUL2_C:
+        case OpCode::Id::HMUL2_R:
+            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
+        default:
+            UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
+            return Immediate(0);
+        }
+    }();
+    value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
+    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
+
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -0,0 +1,54 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
+        if (instr.alu_half_imm.ftz == 0) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+        }
+    } else {
+        if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+        }
+    }
+
+    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
+    op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
+
+    const Node op_b = UnpackHalfImmediate(instr, true);
+
+    Node value = [&]() {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HADD2_IMM:
+            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
+        case OpCode::Id::HMUL2_IMM:
+            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+
+    value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
+    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
+    SetRegister(bb, instr.gpr0, value);
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -0,0 +1,53 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::MOV32_IMM: {
+        SetRegister(bb, instr.gpr0, GetImmediate32(instr));
+        break;
+    }
+    case OpCode::Id::FMUL32_IMM: {
+        Node value =
+            Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
+        value = GetSaturatedFloat(value, instr.fmul32.saturate);
+
+        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::FADD32I: {
+        const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
+                                                instr.fadd32i.negate_a);
+        const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
+                                                instr.fadd32i.negate_b);
+
+        const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
+        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
+                          opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -0,0 +1,375 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::IAdd3Height;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::Register;
+
+u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::IADD_C:
+    case OpCode::Id::IADD_R:
+    case OpCode::Id::IADD_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
+        UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
+
+        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
+        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
+
+        Node value = Operation(OperationCode::UAdd, op_a, op_b);
+
+        if (instr.iadd.x) {
+            Node carry = GetInternalFlag(InternalFlag::Carry);
+            Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
+            value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
+        }
+
+        if (instr.generates_cc) {
+            const Node i0 = Immediate(0);
+
+            Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
+            Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
+            Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
+
+            Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
+            Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
+            Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
+            Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
+
+            SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
+            SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
+            SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
+            SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
+        }
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
+    case OpCode::Id::IADD3_C:
+    case OpCode::Id::IADD3_R:
+    case OpCode::Id::IADD3_IMM: {
+        Node op_c = GetRegister(instr.gpr39);
+
+        const auto ApplyHeight = [&](IAdd3Height height, Node value) {
+            switch (height) {
+            case IAdd3Height::None:
+                return value;
+            case IAdd3Height::LowerHalfWord:
+                return BitfieldExtract(value, 0, 16);
+            case IAdd3Height::UpperHalfWord:
+                return BitfieldExtract(value, 16, 16);
+            default:
+                UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
+                return Immediate(0);
+            }
+        };
+
+        if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
+            op_a = ApplyHeight(instr.iadd3.height_a, op_a);
+            op_b = ApplyHeight(instr.iadd3.height_b, op_b);
+            op_c = ApplyHeight(instr.iadd3.height_c, op_c);
+        }
+
+        op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
+        op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
+        op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
+
+        const Node value = [&] {
+            Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
+            if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
+                return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
+            }
+            const Node shifted = [&] {
+                switch (instr.iadd3.mode) {
+                case Tegra::Shader::IAdd3Mode::RightShift:
+                    // TODO(tech4me): According to
+                    // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
+                    // The addition between op_a and op_b should be done in uint33, more
+                    // investigation required
+                    return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
+                                     Immediate(16));
+                case Tegra::Shader::IAdd3Mode::LeftShift:
+                    return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
+                                     Immediate(16));
+                default:
+                    return add_ab;
+                }
+            }();
+            return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
+        }();
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::ISCADD_C:
+    case OpCode::Id::ISCADD_R:
+    case OpCode::Id::ISCADD_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in ISCADD is not implemented");
+
+        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
+        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
+
+        const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
+        const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
+        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::POPC_C:
+    case OpCode::Id::POPC_R:
+    case OpCode::Id::POPC_IMM: {
+        if (instr.popc.invert) {
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+        }
+        const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::FLO_R:
+    case OpCode::Id::FLO_C:
+    case OpCode::Id::FLO_IMM: {
+        Node value;
+        if (instr.flo.invert) {
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
+        }
+        if (instr.flo.is_signed) {
+            value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
+        } else {
+            value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
+        }
+        if (instr.flo.sh) {
+            value =
+                Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
+        }
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
+    case OpCode::Id::SEL_C:
+    case OpCode::Id::SEL_R:
+    case OpCode::Id::SEL_IMM: {
+        const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
+        const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::ICMP_CR:
+    case OpCode::Id::ICMP_R:
+    case OpCode::Id::ICMP_RC:
+    case OpCode::Id::ICMP_IMM: {
+        const Node zero = Immediate(0);
+
+        const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::ICMP_CR:
+                return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
+                        GetRegister(instr.gpr39)};
+            case OpCode::Id::ICMP_R:
+                return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
+            case OpCode::Id::ICMP_RC:
+                return {GetRegister(instr.gpr39),
+                        GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+            case OpCode::Id::ICMP_IMM:
+                return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
+            default:
+                UNREACHABLE();
+                return {zero, zero};
+            }
+        }();
+        const Node op_lhs = GetRegister(instr.gpr8);
+        const Node comparison =
+            GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
+        SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
+        break;
+    }
+    case OpCode::Id::LOP_C:
+    case OpCode::Id::LOP_R:
+    case OpCode::Id::LOP_IMM: {
+        if (instr.alu.lop.invert_a)
+            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
+        if (instr.alu.lop.invert_b)
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+
+        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
+                            instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
+                            instr.generates_cc);
+        break;
+    }
+    case OpCode::Id::LOP3_C:
+    case OpCode::Id::LOP3_R:
+    case OpCode::Id::LOP3_IMM: {
+        const Node op_c = GetRegister(instr.gpr39);
+        const Node lut = [&]() {
+            if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
+                return Immediate(instr.alu.lop3.GetImmLut28());
+            } else {
+                return Immediate(instr.alu.lop3.GetImmLut48());
+            }
+        }();
+
+        WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
+        break;
+    }
+    case OpCode::Id::IMNMX_C:
+    case OpCode::Id::IMNMX_R:
+    case OpCode::Id::IMNMX_IMM: {
+        UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
+
+        const bool is_signed = instr.imnmx.is_signed;
+
+        const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
+        const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
+        const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
+        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::LEA_R2:
+    case OpCode::Id::LEA_R1:
+    case OpCode::Id::LEA_IMM:
+    case OpCode::Id::LEA_RZ:
+    case OpCode::Id::LEA_HI: {
+        auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::LEA_R2: {
+                return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
+                        Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
+            }
+            case OpCode::Id::LEA_R1: {
+                const bool neg = instr.lea.r1.neg != 0;
+                return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
+                        GetRegister(instr.gpr20),
+                        Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
+            }
+            case OpCode::Id::LEA_IMM: {
+                const bool neg = instr.lea.imm.neg != 0;
+                return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
+                        Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
+                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
+            }
+            case OpCode::Id::LEA_RZ: {
+                const bool neg = instr.lea.rz.neg != 0;
+                return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
+                        GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
+                        Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
+            }
+            case OpCode::Id::LEA_HI:
+            default:
+                UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
+
+                return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
+                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
+            }
+        }();
+
+        UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
+                             "Unhandled LEA Predicate");
+
+        Node value =
+            Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
+        value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
+        SetRegister(bb, instr.gpr0, std::move(value));
+
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
+                                    Node imm_lut, bool sets_cc) {
+    const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
+        Node value = Immediate(0);
+        const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
+        if (imm.GetValue() & 0x01) {
+            const Node a = Operation(OperationCode::IBitwiseNot, na);
+            const Node b = Operation(OperationCode::IBitwiseNot, nb);
+            const Node c = Operation(OperationCode::IBitwiseNot, nc);
+            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
+            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+            value = Operation(OperationCode::IBitwiseOr, value, r);
+        }
+        if (imm.GetValue() & 0x02) {
+            const Node a = Operation(OperationCode::IBitwiseNot, na);
+            const Node b = Operation(OperationCode::IBitwiseNot, nb);
+            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
+            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+            value = Operation(OperationCode::IBitwiseOr, value, r);
+        }
+        if (imm.GetValue() & 0x04) {
+            const Node a = Operation(OperationCode::IBitwiseNot, na);
+            const Node c = Operation(OperationCode::IBitwiseNot, nc);
+            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
+            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+            value = Operation(OperationCode::IBitwiseOr, value, r);
+        }
+        if (imm.GetValue() & 0x08) {
+            const Node a = Operation(OperationCode::IBitwiseNot, na);
+            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
+            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+            value = Operation(OperationCode::IBitwiseOr, value, r);
+        }
+        if (imm.GetValue() & 0x10) {
+            const Node b = Operation(OperationCode::IBitwiseNot, nb);
+            const Node c = Operation(OperationCode::IBitwiseNot, nc);
+            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
+            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+            value = Operation(OperationCode::IBitwiseOr, value, r);
+        }
+        if (imm.GetValue() & 0x20) {
+            const Node b = Operation(OperationCode::IBitwiseNot, nb);
+            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
+            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+            value = Operation(OperationCode::IBitwiseOr, value, r);
+        }
+        if (imm.GetValue() & 0x40) {
+            const Node c = Operation(OperationCode::IBitwiseNot, nc);
+            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
+            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
+            value = Operation(OperationCode::IBitwiseOr, value, r);
+        }
+        if (imm.GetValue() & 0x80) {
+            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
+            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
+            value = Operation(OperationCode::IBitwiseOr, value, r);
+        }
+        return value;
+    }(op_a, op_b, op_c, imm_lut);
+
+    SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
+    SetRegister(bb, dest, lop3_fast);
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -0,0 +1,99 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::LogicOperation;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::PredicateResultMode;
+using Tegra::Shader::Register;
+
+u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::IADD32I: {
+        UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
+
+        op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
+
+        Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
+
+        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
+    case OpCode::Id::LOP32I: {
+        if (instr.alu.lop32i.invert_a) {
+            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
+        }
+
+        if (instr.alu.lop32i.invert_b) {
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
+        }
+
+        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
+                            std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
+                            instr.op_32.generates_cc != 0);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
+                          opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
+                                   Node op_b, PredicateResultMode predicate_mode, Pred predicate,
+                                   bool sets_cc) {
+    Node result = [&] {
+        switch (logic_op) {
+        case LogicOperation::And:
+            return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
+        case LogicOperation::Or:
+            return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
+        case LogicOperation::Xor:
+            return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
+        case LogicOperation::PassB:
+            return op_b;
+        default:
+            UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
+            return Immediate(0);
+        }
+    }();
+
+    SetInternalFlagsFromInteger(bb, result, sets_cc);
+    SetRegister(bb, dest, result);
+
+    // Write the predicate value depending on the predicate mode.
+    switch (predicate_mode) {
+    case PredicateResultMode::None:
+        // Do nothing.
+        return;
+    case PredicateResultMode::NotZero: {
+        // Set the predicate to true if the result is not zero.
+        Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
+        SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
+    }
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -0,0 +1,77 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = [&] {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::BFE_R:
+            return GetRegister(instr.gpr20);
+        case OpCode::Id::BFE_C:
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        case OpCode::Id::BFE_IMM:
+            return Immediate(instr.alu.GetSignedImm20_20());
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+
+    UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
+
+    const bool is_signed = instr.bfe.is_signed;
+
+    // using reverse parallel method in
+    // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
+    // note for later if possible to implement faster method.
+    if (instr.bfe.brev) {
+        const auto swap = [&](u32 s, u32 mask) {
+            Node v1 =
+                SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
+            if (mask != 0) {
+                v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
+                                     Immediate(mask));
+            }
+            Node v2 = op_a;
+            if (mask != 0) {
+                v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
+                                     Immediate(mask));
+            }
+            v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
+                                 Immediate(s));
+            return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
+                                   std::move(v2));
+        };
+        op_a = swap(1, 0x55555555U);
+        op_a = swap(2, 0x33333333U);
+        op_a = swap(4, 0x0F0F0F0FU);
+        op_a = swap(8, 0x00FF00FFU);
+        op_a = swap(16, 0);
+    }
+
+    const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
+                                        Immediate(0), Immediate(8));
+    const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
+                                      Immediate(8), Immediate(8));
+    auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
+    SetRegister(bb, instr.gpr0, std::move(result));
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -0,0 +1,45 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::BFI_RC:
+            return {GetRegister(instr.gpr39),
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+        case OpCode::Id::BFI_IMM_R:
+            return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
+        default:
+            UNREACHABLE();
+            return {Immediate(0), Immediate(0)};
+        }
+    }();
+    const Node insert = GetRegister(instr.gpr8);
+    const Node offset = BitfieldExtract(packed_shift, 0, 8);
+    const Node bits = BitfieldExtract(packed_shift, 8, 8);
+
+    const Node value =
+        Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
+
+    SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -0,0 +1,321 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <limits>
+#include <optional>
+#include <utility>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
+
+namespace {
+
+constexpr OperationCode GetFloatSelector(u64 selector) {
+    return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
+}
+
+constexpr u32 SizeInBits(Register::Size size) {
+    switch (size) {
+    case Register::Size::Byte:
+        return 8;
+    case Register::Size::Short:
+        return 16;
+    case Register::Size::Word:
+        return 32;
+    case Register::Size::Long:
+        return 64;
+    }
+    return 0;
+}
+
+constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
+                                                                   Register::Size dst_size,
+                                                                   bool src_signed,
+                                                                   bool dst_signed) {
+    const u32 dst_bits = SizeInBits(dst_size);
+    if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
+        if (src_signed == dst_signed) {
+            return std::nullopt;
+        }
+        return std::make_pair(0, std::numeric_limits<s32>::max());
+    }
+    if (dst_signed) {
+        // Signed destination, clamp to [-128, 127] for instance
+        return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
+    } else {
+        // Unsigned destination
+        if (dst_bits == 32) {
+            // Avoid shifting by 32, that is undefined behavior
+            return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
+        }
+        return std::make_pair(0, (1 << dst_bits) - 1);
+    }
+}
+
+} // Anonymous namespace
+
+u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::I2I_R:
+    case OpCode::Id::I2I_C:
+    case OpCode::Id::I2I_IMM: {
+        const bool src_signed = instr.conversion.is_input_signed;
+        const bool dst_signed = instr.conversion.is_output_signed;
+        const Register::Size src_size = instr.conversion.src_size;
+        const Register::Size dst_size = instr.conversion.dst_size;
+        const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
+
+        Node value = [this, instr, opcode] {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::I2I_R:
+                return GetRegister(instr.gpr20);
+            case OpCode::Id::I2I_C:
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            case OpCode::Id::I2I_IMM:
+                return Immediate(instr.alu.GetSignedImm20_20());
+            default:
+                UNREACHABLE();
+                return Immediate(0);
+            }
+        }();
+
+        // Ensure the source selector is valid
+        switch (instr.conversion.src_size) {
+        case Register::Size::Byte:
+            break;
+        case Register::Size::Short:
+            ASSERT(selector == 0 || selector == 2);
+            break;
+        default:
+            ASSERT(selector == 0);
+            break;
+        }
+
+        if (src_size != Register::Size::Word || selector != 0) {
+            value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
+                                    Immediate(selector * 8), Immediate(SizeInBits(src_size)));
+        }
+
+        value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
+                                        instr.conversion.negate_a, src_signed);
+
+        if (instr.alu.saturate_d) {
+            if (src_signed && !dst_signed) {
+                Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
+                                             Immediate(1 << (SizeInBits(src_size) - 1)));
+                value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
+                                  std::move(value));
+
+                // Simplify generated expressions, this can be removed without semantic impact
+                SetTemporary(bb, 0, std::move(value));
+                value = GetTemporary(0);
+
+                if (dst_size != Register::Size::Word) {
+                    const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
+                    Node is_large =
+                        Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
+                    value = Operation(OperationCode::Select, std::move(is_large), limit,
+                                      std::move(value));
+                }
+            } else if (const std::optional bounds =
+                           IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
+                value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
+                                        Immediate(bounds->first));
+                value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
+                                        Immediate(bounds->second));
+            }
+        } else if (dst_size != Register::Size::Word) {
+            // No saturation, we only have to mask the result
+            Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
+            value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
+        }
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
+    case OpCode::Id::I2F_R:
+    case OpCode::Id::I2F_C:
+    case OpCode::Id::I2F_IMM: {
+        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in I2F is not implemented");
+
+        Node value = [&] {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::I2F_R:
+                return GetRegister(instr.gpr20);
+            case OpCode::Id::I2F_C:
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            case OpCode::Id::I2F_IMM:
+                return Immediate(instr.alu.GetSignedImm20_20());
+            default:
+                UNREACHABLE();
+                return Immediate(0);
+            }
+        }();
+
+        const bool input_signed = instr.conversion.is_input_signed;
+
+        if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
+            ASSERT(instr.conversion.src_size == Register::Size::Byte ||
+                   instr.conversion.src_size == Register::Size::Short);
+            if (instr.conversion.src_size == Register::Size::Short) {
+                ASSERT(offset == 0 || offset == 2);
+            }
+            value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
+                                    std::move(value), Immediate(offset * 8));
+        }
+
+        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
+        value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
+        value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
+        value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
+
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+
+        if (instr.conversion.dst_size == Register::Size::Short) {
+            value = Operation(OperationCode::HCastFloat, PRECISE, value);
+        }
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::F2F_R:
+    case OpCode::Id::F2F_C:
+    case OpCode::Id::F2F_IMM: {
+        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
+        UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in F2F is not implemented");
+
+        Node value = [&]() {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::F2F_R:
+                return GetRegister(instr.gpr20);
+            case OpCode::Id::F2F_C:
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            case OpCode::Id::F2F_IMM:
+                return GetImmediate19(instr);
+            default:
+                UNREACHABLE();
+                return Immediate(0);
+            }
+        }();
+
+        if (instr.conversion.src_size == Register::Size::Short) {
+            value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
+                              std::move(value));
+        } else {
+            ASSERT(instr.conversion.float_src.selector == 0);
+        }
+
+        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
+
+        value = [&] {
+            if (instr.conversion.src_size != instr.conversion.dst_size) {
+                // Rounding operations only matter when the source and destination conversion size
+                // is the same.
+                return value;
+            }
+            switch (instr.conversion.f2f.GetRoundingMode()) {
+            case Tegra::Shader::F2fRoundingOp::None:
+                return value;
+            case Tegra::Shader::F2fRoundingOp::Round:
+                return Operation(OperationCode::FRoundEven, value);
+            case Tegra::Shader::F2fRoundingOp::Floor:
+                return Operation(OperationCode::FFloor, value);
+            case Tegra::Shader::F2fRoundingOp::Ceil:
+                return Operation(OperationCode::FCeil, value);
+            case Tegra::Shader::F2fRoundingOp::Trunc:
+                return Operation(OperationCode::FTrunc, value);
+            default:
+                UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
+                                  instr.conversion.f2f.rounding.Value());
+                return value;
+            }
+        }();
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+
+        if (instr.conversion.dst_size == Register::Size::Short) {
+            value = Operation(OperationCode::HCastFloat, PRECISE, value);
+        }
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::F2I_R:
+    case OpCode::Id::F2I_C:
+    case OpCode::Id::F2I_IMM: {
+        UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in F2I is not implemented");
+        Node value = [&]() {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::F2I_R:
+                return GetRegister(instr.gpr20);
+            case OpCode::Id::F2I_C:
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            case OpCode::Id::F2I_IMM:
+                return GetImmediate19(instr);
+            default:
+                UNREACHABLE();
+                return Immediate(0);
+            }
+        }();
+
+        if (instr.conversion.src_size == Register::Size::Short) {
+            value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
+                              std::move(value));
+        } else {
+            ASSERT(instr.conversion.float_src.selector == 0);
+        }
+
+        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
+
+        value = [&]() {
+            switch (instr.conversion.f2i.rounding) {
+            case Tegra::Shader::F2iRoundingOp::RoundEven:
+                return Operation(OperationCode::FRoundEven, PRECISE, value);
+            case Tegra::Shader::F2iRoundingOp::Floor:
+                return Operation(OperationCode::FFloor, PRECISE, value);
+            case Tegra::Shader::F2iRoundingOp::Ceil:
+                return Operation(OperationCode::FCeil, PRECISE, value);
+            case Tegra::Shader::F2iRoundingOp::Trunc:
+                return Operation(OperationCode::FTrunc, PRECISE, value);
+            default:
+                UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
+                                  instr.conversion.f2i.rounding.Value());
+                return Immediate(0);
+            }
+        }();
+        const bool is_signed = instr.conversion.is_output_signed;
+        value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
+        value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -0,0 +1,62 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
+    if (instr.ffma.tab5980_0 != 1) {
+        LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
+    }
+    if (instr.ffma.tab5980_1 != 0) {
+        LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
+    }
+
+    const Node op_a = GetRegister(instr.gpr8);
+
+    auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::FFMA_CR: {
+            return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
+                    GetRegister(instr.gpr39)};
+        }
+        case OpCode::Id::FFMA_RR:
+            return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
+        case OpCode::Id::FFMA_RC: {
+            return {GetRegister(instr.gpr39),
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+        }
+        case OpCode::Id::FFMA_IMM:
+            return {GetImmediate19(instr), GetRegister(instr.gpr39)};
+        default:
+            UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
+            return {Immediate(0), Immediate(0)};
+        }
+    }();
+
+    op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
+    op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
+
+    Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
+    value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+    SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -0,0 +1,58 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+
+    const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
+                                            instr.fset.neg_a != 0);
+
+    Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return GetImmediate19(instr);
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
+
+    // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
+    // condition is true, and to 0 otherwise.
+    const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
+    const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
+
+    const Node predicate = Operation(combiner, first_pred, second_pred);
+
+    const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
+    const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
+    const Node value =
+        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
+
+    if (instr.fset.bf) {
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+    } else {
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+    }
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -0,0 +1,57 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+
+u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+
+    Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
+                                      instr.fsetp.neg_a != 0);
+    Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return GetImmediate19(instr);
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+    op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
+
+    // We can't use the constant predicate as destination.
+    ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+    const Node predicate =
+        GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
+    const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
+    const Node value = Operation(combiner, predicate, second_pred);
+
+    // Set the primary predicate to the result of Predicate OP SecondPredicate
+    SetPredicate(bb, instr.fsetp.pred3, value);
+
+    if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+        // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+        // if enabled
+        const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
+        const Node second_value = Operation(combiner, negated_pred, second_pred);
+        SetPredicate(bb, instr.fsetp.pred0, second_value);
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -0,0 +1,115 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using std::move;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
+
+u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    PredCondition cond;
+    bool bf;
+    bool ftz;
+    bool neg_a;
+    bool abs_a;
+    bool neg_b;
+    bool abs_b;
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::HSET2_C:
+    case OpCode::Id::HSET2_IMM:
+        cond = instr.hsetp2.cbuf_and_imm.cond;
+        bf = instr.Bit(53);
+        ftz = instr.Bit(54);
+        neg_a = instr.Bit(43);
+        abs_a = instr.Bit(44);
+        neg_b = instr.Bit(56);
+        abs_b = instr.Bit(54);
+        break;
+    case OpCode::Id::HSET2_R:
+        cond = instr.hsetp2.reg.cond;
+        bf = instr.Bit(49);
+        ftz = instr.Bit(50);
+        neg_a = instr.Bit(43);
+        abs_a = instr.Bit(44);
+        neg_b = instr.Bit(31);
+        abs_b = instr.Bit(30);
+        break;
+    default:
+        UNREACHABLE();
+    }
+
+    Node op_b = [this, instr, opcode] {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HSET2_C:
+            // Inform as unimplemented as this is not tested.
+            UNIMPLEMENTED_MSG("HSET2_C is not implemented");
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        case OpCode::Id::HSET2_R:
+            return GetRegister(instr.gpr20);
+        case OpCode::Id::HSET2_IMM:
+            return UnpackHalfImmediate(instr, true);
+        default:
+            UNREACHABLE();
+            return Node{};
+        }
+    }();
+
+    if (!ftz) {
+        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+    }
+
+    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
+    op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::HSET2_R:
+        op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
+        [[fallthrough]];
+    case OpCode::Id::HSET2_C:
+        op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
+        break;
+    default:
+        break;
+    }
+
+    Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+
+    Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
+
+    // HSET2 operates on each half float in the pack.
+    std::array<Node, 2> values;
+    for (u32 i = 0; i < 2; ++i) {
+        const u32 raw_value = bf ? 0x3c00 : 0xffff;
+        Node true_value = Immediate(raw_value << (i * 16));
+        Node false_value = Immediate(0);
+
+        Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
+        Node predicate = Operation(combiner, comparison, second_pred);
+        values[i] =
+            Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
+    }
+
+    Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
+    SetRegister(bb, instr.gpr0, move(value));
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -0,0 +1,80 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+
+u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    if (instr.hsetp2.ftz != 0) {
+        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+    }
+
+    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
+    op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
+
+    Tegra::Shader::PredCondition cond{};
+    bool h_and{};
+    Node op_b{};
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::HSETP2_C:
+        cond = instr.hsetp2.cbuf_and_imm.cond;
+        h_and = instr.hsetp2.cbuf_and_imm.h_and;
+        op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
+                                    instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
+        // F32 is hardcoded in hardware
+        op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
+        break;
+    case OpCode::Id::HSETP2_IMM:
+        cond = instr.hsetp2.cbuf_and_imm.cond;
+        h_and = instr.hsetp2.cbuf_and_imm.h_and;
+        op_b = UnpackHalfImmediate(instr, true);
+        break;
+    case OpCode::Id::HSETP2_R:
+        cond = instr.hsetp2.reg.cond;
+        h_and = instr.hsetp2.reg.h_and;
+        op_b =
+            GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
+                                 instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
+        break;
+    default:
+        UNREACHABLE();
+        op_b = Immediate(0);
+    }
+
+    const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
+    const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
+
+    const auto Write = [&](u64 dest, Node src) {
+        SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
+    };
+
+    const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
+    const u64 first = instr.hsetp2.pred3;
+    const u64 second = instr.hsetp2.pred0;
+    if (h_and) {
+        Node joined = Operation(OperationCode::LogicalAnd2, comparison);
+        Write(first, joined);
+        Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
+    } else {
+        Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
+        Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -0,0 +1,73 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <tuple>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::HalfPrecision;
+using Tegra::Shader::HalfType;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
+        DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
+    } else {
+        DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
+    }
+
+    constexpr auto identity = HalfType::H0_H1;
+    bool neg_b{}, neg_c{};
+    auto [saturate, type_b, op_b, type_c,
+          op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HFMA2_CR:
+            neg_b = instr.hfma2.negate_b;
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, HalfType::F32,
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
+                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
+        case OpCode::Id::HFMA2_RC:
+            neg_b = instr.hfma2.negate_b;
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
+                    HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+        case OpCode::Id::HFMA2_RR:
+            neg_b = instr.hfma2.rr.negate_b;
+            neg_c = instr.hfma2.rr.negate_c;
+            return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
+                    instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
+        case OpCode::Id::HFMA2_IMM_R:
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
+                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
+        default:
+            return {false, identity, Immediate(0), identity, Immediate(0)};
+        }
+    }();
+
+    const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
+    op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
+    op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
+
+    Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
+    value = GetSaturatedHalfFloat(value, saturate);
+    value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
+
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -0,0 +1,536 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <vector>
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
+using Tegra::Shader::StoreType;
+using Tegra::Texture::ComponentType;
+using Tegra::Texture::TextureFormat;
+using Tegra::Texture::TICEntry;
+
+namespace {
+
+ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
+                               std::size_t component) {
+    const TextureFormat format{descriptor.format};
+    switch (format) {
+    case TextureFormat::R16G16B16A16:
+    case TextureFormat::R32G32B32A32:
+    case TextureFormat::R32G32B32:
+    case TextureFormat::R32G32:
+    case TextureFormat::R16G16:
+    case TextureFormat::R32:
+    case TextureFormat::R16:
+    case TextureFormat::R8:
+    case TextureFormat::R1:
+        if (component == 0) {
+            return descriptor.r_type;
+        }
+        if (component == 1) {
+            return descriptor.g_type;
+        }
+        if (component == 2) {
+            return descriptor.b_type;
+        }
+        if (component == 3) {
+            return descriptor.a_type;
+        }
+        break;
+    case TextureFormat::A8R8G8B8:
+        if (component == 0) {
+            return descriptor.a_type;
+        }
+        if (component == 1) {
+            return descriptor.r_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        if (component == 3) {
+            return descriptor.b_type;
+        }
+        break;
+    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A4B4G4R4:
+    case TextureFormat::A5B5G5R1:
+    case TextureFormat::A1B5G5R5:
+        if (component == 0) {
+            return descriptor.a_type;
+        }
+        if (component == 1) {
+            return descriptor.b_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        if (component == 3) {
+            return descriptor.r_type;
+        }
+        break;
+    case TextureFormat::R32_B24G8:
+        if (component == 0) {
+            return descriptor.r_type;
+        }
+        if (component == 1) {
+            return descriptor.b_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        break;
+    case TextureFormat::B5G6R5:
+    case TextureFormat::B6G5R5:
+    case TextureFormat::B10G11R11:
+        if (component == 0) {
+            return descriptor.b_type;
+        }
+        if (component == 1) {
+            return descriptor.g_type;
+        }
+        if (component == 2) {
+            return descriptor.r_type;
+        }
+        break;
+    case TextureFormat::R24G8:
+    case TextureFormat::R8G24:
+    case TextureFormat::R8G8:
+    case TextureFormat::G4R4:
+        if (component == 0) {
+            return descriptor.g_type;
+        }
+        if (component == 1) {
+            return descriptor.r_type;
+        }
+        break;
+    default:
+        break;
+    }
+    UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+    return ComponentType::FLOAT;
+}
+
+bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
+    constexpr u8 R = 0b0001;
+    constexpr u8 G = 0b0010;
+    constexpr u8 B = 0b0100;
+    constexpr u8 A = 0b1000;
+    constexpr std::array<u8, 16> mask = {
+        0,   (R),     (G),     (R | G),     (B),     (R | B),     (G | B),     (R | G | B),
+        (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
+    return std::bitset<4>{mask.at(component_mask)}.test(component);
+}
+
+u32 GetComponentSize(TextureFormat format, std::size_t component) {
+    switch (format) {
+    case TextureFormat::R32G32B32A32:
+        return 32;
+    case TextureFormat::R16G16B16A16:
+        return 16;
+    case TextureFormat::R32G32B32:
+        return component <= 2 ? 32 : 0;
+    case TextureFormat::R32G32:
+        return component <= 1 ? 32 : 0;
+    case TextureFormat::R16G16:
+        return component <= 1 ? 16 : 0;
+    case TextureFormat::R32:
+        return component == 0 ? 32 : 0;
+    case TextureFormat::R16:
+        return component == 0 ? 16 : 0;
+    case TextureFormat::R8:
+        return component == 0 ? 8 : 0;
+    case TextureFormat::R1:
+        return component == 0 ? 1 : 0;
+    case TextureFormat::A8R8G8B8:
+        return 8;
+    case TextureFormat::A2B10G10R10:
+        return (component == 3 || component == 2 || component == 1) ? 10 : 2;
+    case TextureFormat::A4B4G4R4:
+        return 4;
+    case TextureFormat::A5B5G5R1:
+        return (component == 0 || component == 1 || component == 2) ? 5 : 1;
+    case TextureFormat::A1B5G5R5:
+        return (component == 1 || component == 2 || component == 3) ? 5 : 1;
+    case TextureFormat::R32_B24G8:
+        if (component == 0) {
+            return 32;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        if (component == 2) {
+            return 8;
+        }
+        return 0;
+    case TextureFormat::B5G6R5:
+        if (component == 0 || component == 2) {
+            return 5;
+        }
+        if (component == 1) {
+            return 6;
+        }
+        return 0;
+    case TextureFormat::B6G5R5:
+        if (component == 1 || component == 2) {
+            return 5;
+        }
+        if (component == 0) {
+            return 6;
+        }
+        return 0;
+    case TextureFormat::B10G11R11:
+        if (component == 1 || component == 2) {
+            return 11;
+        }
+        if (component == 0) {
+            return 10;
+        }
+        return 0;
+    case TextureFormat::R24G8:
+        if (component == 0) {
+            return 8;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        return 0;
+    case TextureFormat::R8G24:
+        if (component == 0) {
+            return 24;
+        }
+        if (component == 1) {
+            return 8;
+        }
+        return 0;
+    case TextureFormat::R8G8:
+        return (component == 0 || component == 1) ? 8 : 0;
+    case TextureFormat::G4R4:
+        return (component == 0 || component == 1) ? 4 : 0;
+    default:
+        UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+        return 0;
+    }
+}
+
+std::size_t GetImageComponentMask(TextureFormat format) {
+    constexpr u8 R = 0b0001;
+    constexpr u8 G = 0b0010;
+    constexpr u8 B = 0b0100;
+    constexpr u8 A = 0b1000;
+    switch (format) {
+    case TextureFormat::R32G32B32A32:
+    case TextureFormat::R16G16B16A16:
+    case TextureFormat::A8R8G8B8:
+    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A4B4G4R4:
+    case TextureFormat::A5B5G5R1:
+    case TextureFormat::A1B5G5R5:
+        return std::size_t{R | G | B | A};
+    case TextureFormat::R32G32B32:
+    case TextureFormat::R32_B24G8:
+    case TextureFormat::B5G6R5:
+    case TextureFormat::B6G5R5:
+    case TextureFormat::B10G11R11:
+        return std::size_t{R | G | B};
+    case TextureFormat::R32G32:
+    case TextureFormat::R16G16:
+    case TextureFormat::R24G8:
+    case TextureFormat::R8G24:
+    case TextureFormat::R8G8:
+    case TextureFormat::G4R4:
+        return std::size_t{R | G};
+    case TextureFormat::R32:
+    case TextureFormat::R16:
+    case TextureFormat::R8:
+    case TextureFormat::R1:
+        return std::size_t{R};
+    default:
+        UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+        return std::size_t{R | G | B | A};
+    }
+}
+
+std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
+    switch (image_type) {
+    case Tegra::Shader::ImageType::Texture1D:
+    case Tegra::Shader::ImageType::TextureBuffer:
+        return 1;
+    case Tegra::Shader::ImageType::Texture1DArray:
+    case Tegra::Shader::ImageType::Texture2D:
+        return 2;
+    case Tegra::Shader::ImageType::Texture2DArray:
+    case Tegra::Shader::ImageType::Texture3D:
+        return 3;
+    }
+    UNREACHABLE();
+    return 1;
+}
+} // Anonymous namespace
+
+std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
+                                                  Node original_value) {
+    switch (component_type) {
+    case ComponentType::SNORM: {
+        // range [-1.0, 1.0]
+        auto cnv_value = Operation(OperationCode::FMul, original_value,
+                                   Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
+        cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
+        return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
+    }
+    case ComponentType::SINT:
+    case ComponentType::UNORM: {
+        bool is_signed = component_type == ComponentType::SINT;
+        // range [0.0, 1.0]
+        auto cnv_value = Operation(OperationCode::FMul, original_value,
+                                   Immediate(static_cast<float>(1 << component_size) - 1.f));
+        return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
+                is_signed};
+    }
+    case ComponentType::UINT: // range [0, (1 << component_size) - 1]
+        return {std::move(original_value), false};
+    case ComponentType::FLOAT:
+        if (component_size == 16) {
+            return {Operation(OperationCode::HCastFloat, original_value), true};
+        } else {
+            return {std::move(original_value), true};
+        }
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
+        return {std::move(original_value), true};
+    }
+}
+
+u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
+        std::vector<Node> coords;
+        const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
+        coords.reserve(num_coords);
+        for (std::size_t i = 0; i < num_coords; ++i) {
+            coords.push_back(GetRegister(instr.gpr8.Value() + i));
+        }
+        return coords;
+    };
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::SULD: {
+        UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
+                         Tegra::Shader::OutOfBoundsStore::Ignore);
+
+        const auto type{instr.suldst.image_type};
+        auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
+                                              : GetBindlessImage(instr.gpr39, type)};
+        image.MarkRead();
+
+        if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
+            u32 indexer = 0;
+            for (u32 element = 0; element < 4; ++element) {
+                if (!instr.suldst.IsComponentEnabled(element)) {
+                    continue;
+                }
+                MetaImage meta{image, {}, element};
+                Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
+                SetTemporary(bb, indexer++, std::move(value));
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+            }
+        } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
+            UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
+                             instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
+
+            auto descriptor = [this, instr] {
+                std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
+                if (instr.suldst.is_immediate) {
+                    sampler_descriptor =
+                        registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
+                } else {
+                    const Node image_register = GetRegister(instr.gpr39);
+                    const auto result = TrackCbuf(image_register, global_code,
+                                                  static_cast<s64>(global_code.size()));
+                    const auto buffer = std::get<1>(result);
+                    const auto offset = std::get<2>(result);
+                    sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
+                }
+                if (!sampler_descriptor) {
+                    UNREACHABLE_MSG("Failed to obtain image descriptor");
+                }
+                return *sampler_descriptor;
+            }();
+
+            const auto comp_mask = GetImageComponentMask(descriptor.format);
+
+            switch (instr.suldst.GetStoreDataLayout()) {
+            case StoreType::Bits32:
+            case StoreType::Bits64: {
+                u32 indexer = 0;
+                u32 shifted_counter = 0;
+                Node value = Immediate(0);
+                for (u32 element = 0; element < 4; ++element) {
+                    if (!IsComponentEnabled(comp_mask, element)) {
+                        continue;
+                    }
+                    const auto component_type = GetComponentType(descriptor, element);
+                    const auto component_size = GetComponentSize(descriptor.format, element);
+                    MetaImage meta{image, {}, element};
+
+                    auto [converted_value, is_signed] = GetComponentValue(
+                        component_type, component_size,
+                        Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
+
+                    // shift element to correct position
+                    const auto shifted = shifted_counter;
+                    if (shifted > 0) {
+                        converted_value =
+                            SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
+                                            std::move(converted_value), Immediate(shifted));
+                    }
+                    shifted_counter += component_size;
+
+                    // add value into result
+                    value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
+
+                    // if we shifted enough for 1 byte -> we save it into temp
+                    if (shifted_counter >= 32) {
+                        SetTemporary(bb, indexer++, std::move(value));
+                        // reset counter and value to prepare pack next byte
+                        value = Immediate(0);
+                        shifted_counter = 0;
+                    }
+                }
+                for (u32 i = 0; i < indexer; ++i) {
+                    SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+                }
+                break;
+            }
+            default:
+                UNREACHABLE();
+                break;
+            }
+        }
+        break;
+    }
+    case OpCode::Id::SUST: {
+        UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
+        UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
+                         Tegra::Shader::OutOfBoundsStore::Ignore);
+        UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
+
+        std::vector<Node> values;
+        constexpr std::size_t hardcoded_size{4};
+        for (std::size_t i = 0; i < hardcoded_size; ++i) {
+            values.push_back(GetRegister(instr.gpr0.Value() + i));
+        }
+
+        const auto type{instr.suldst.image_type};
+        auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
+                                              : GetBindlessImage(instr.gpr39, type)};
+        image.MarkWrite();
+
+        MetaImage meta{image, std::move(values)};
+        bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
+        break;
+    }
+    case OpCode::Id::SUATOM: {
+        UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
+
+        const OperationCode operation_code = [instr] {
+            switch (instr.suatom_d.operation_type) {
+            case Tegra::Shader::ImageAtomicOperationType::S32:
+            case Tegra::Shader::ImageAtomicOperationType::U32:
+                switch (instr.suatom_d.operation) {
+                case Tegra::Shader::ImageAtomicOperation::Add:
+                    return OperationCode::AtomicImageAdd;
+                case Tegra::Shader::ImageAtomicOperation::And:
+                    return OperationCode::AtomicImageAnd;
+                case Tegra::Shader::ImageAtomicOperation::Or:
+                    return OperationCode::AtomicImageOr;
+                case Tegra::Shader::ImageAtomicOperation::Xor:
+                    return OperationCode::AtomicImageXor;
+                case Tegra::Shader::ImageAtomicOperation::Exch:
+                    return OperationCode::AtomicImageExchange;
+                default:
+                    break;
+                }
+                break;
+            default:
+                break;
+            }
+            UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
+                              static_cast<u64>(instr.suatom_d.operation.Value()),
+                              static_cast<u64>(instr.suatom_d.operation_type.Value()));
+            return OperationCode::AtomicImageAdd;
+        }();
+
+        Node value = GetRegister(instr.gpr0);
+
+        const auto type = instr.suatom_d.image_type;
+        auto& image = GetImage(instr.image, type);
+        image.MarkAtomic();
+
+        MetaImage meta{image, {std::move(value)}};
+        SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
+    const auto offset = static_cast<u32>(image.index.Value());
+
+    const auto it =
+        std::find_if(std::begin(used_images), std::end(used_images),
+                     [offset](const ImageEntry& entry) { return entry.offset == offset; });
+    if (it != std::end(used_images)) {
+        ASSERT(!it->is_bindless && it->type == type);
+        return *it;
+    }
+
+    const auto next_index = static_cast<u32>(used_images.size());
+    return used_images.emplace_back(next_index, offset, type);
+}
+
+ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
+    const Node image_register = GetRegister(reg);
+    const auto result =
+        TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
+
+    const auto buffer = std::get<1>(result);
+    const auto offset = std::get<2>(result);
+
+    const auto it = std::find_if(std::begin(used_images), std::end(used_images),
+                                 [buffer, offset](const ImageEntry& entry) {
+                                     return entry.buffer == buffer && entry.offset == offset;
+                                 });
+    if (it != std::end(used_images)) {
+        ASSERT(it->is_bindless && it->type == type);
+        return *it;
+    }
+
+    const auto next_index = static_cast<u32>(used_images.size());
+    return used_images.emplace_back(next_index, offset, buffer, type);
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -0,0 +1,49 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+
+    const Node op_a = GetRegister(instr.gpr8);
+    const Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
+    // is true, and to 0 otherwise.
+    const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
+    const Node first_pred =
+        GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
+
+    const Node predicate = Operation(combiner, first_pred, second_pred);
+
+    const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
+    const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
+    const Node value =
+        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
+
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -0,0 +1,53 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+
+u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+
+    const Node op_a = GetRegister(instr.gpr8);
+
+    const Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    // We can't use the constant predicate as destination.
+    ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+    const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
+    const Node predicate =
+        GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
+
+    // Set the primary predicate to the result of Predicate OP SecondPredicate
+    const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
+    const Node value = Operation(combiner, predicate, second_pred);
+    SetPredicate(bb, instr.isetp.pred3, value);
+
+    if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+        // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
+        const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
+        SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -0,0 +1,492 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using std::move;
+using Tegra::Shader::AtomicOp;
+using Tegra::Shader::AtomicType;
+using Tegra::Shader::Attribute;
+using Tegra::Shader::GlobalAtomicType;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
+using Tegra::Shader::StoreType;
+
+namespace {
+
+OperationCode GetAtomOperation(AtomicOp op) {
+    switch (op) {
+    case AtomicOp::Add:
+        return OperationCode::AtomicIAdd;
+    case AtomicOp::Min:
+        return OperationCode::AtomicIMin;
+    case AtomicOp::Max:
+        return OperationCode::AtomicIMax;
+    case AtomicOp::And:
+        return OperationCode::AtomicIAnd;
+    case AtomicOp::Or:
+        return OperationCode::AtomicIOr;
+    case AtomicOp::Xor:
+        return OperationCode::AtomicIXor;
+    case AtomicOp::Exch:
+        return OperationCode::AtomicIExchange;
+    default:
+        UNIMPLEMENTED_MSG("op={}", op);
+        return OperationCode::AtomicIAdd;
+    }
+}
+
+bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
+    return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
+           uniform_type == Tegra::Shader::UniformType::UnsignedShort;
+}
+
+u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
+    switch (uniform_type) {
+    case Tegra::Shader::UniformType::UnsignedByte:
+        return 0b11;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 0b10;
+    default:
+        UNREACHABLE();
+        return 0;
+    }
+}
+
+u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
+    switch (uniform_type) {
+    case Tegra::Shader::UniformType::UnsignedByte:
+        return 8;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 16;
+    case Tegra::Shader::UniformType::Single:
+        return 32;
+    case Tegra::Shader::UniformType::Double:
+        return 64;
+    case Tegra::Shader::UniformType::Quad:
+    case Tegra::Shader::UniformType::UnsignedQuad:
+        return 128;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
+        return 32;
+    }
+}
+
+Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
+    Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
+    offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
+    return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
+}
+
+Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
+    Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
+    offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
+    return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
+                     Immediate(size));
+}
+
+Node Sign16Extend(Node value) {
+    Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
+    Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
+    Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
+    return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
+}
+
+} // Anonymous namespace
+
+u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::LD_A: {
+        // Note: Shouldn't this be interp mode flat? As in no interpolation made.
+        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
+                             "Indirect attribute loads are not supported");
+        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
+                             "Unaligned attribute loads are not supported");
+        UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
+                                 instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
+                             "Non-32 bits PHYS reads are not implemented");
+
+        const Node buffer{GetRegister(instr.gpr39)};
+
+        u64 next_element = instr.attribute.fmt20.element;
+        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+        const auto LoadNextElement = [&](u32 reg_offset) {
+            const Node attribute{instr.attribute.fmt20.IsPhysical()
+                                     ? GetPhysicalInputAttribute(instr.gpr8, buffer)
+                                     : GetInputAttribute(static_cast<Attribute::Index>(next_index),
+                                                         next_element, buffer)};
+
+            SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
+
+            // Load the next attribute element into the following register. If the element
+            // to load goes beyond the vec4 size, load the first element of the next
+            // attribute.
+            next_element = (next_element + 1) % 4;
+            next_index = next_index + (next_element == 0 ? 1 : 0);
+        };
+
+        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+            LoadNextElement(reg_offset);
+        }
+        break;
+    }
+    case OpCode::Id::LD_C: {
+        UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
+
+        Node index = GetRegister(instr.gpr8);
+
+        const Node op_a =
+            GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
+
+        switch (instr.ld_c.type.Value()) {
+        case Tegra::Shader::UniformType::Single:
+            SetRegister(bb, instr.gpr0, op_a);
+            break;
+
+        case Tegra::Shader::UniformType::Double: {
+            const Node op_b =
+                GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
+
+            SetTemporary(bb, 0, op_a);
+            SetTemporary(bb, 1, op_b);
+            SetRegister(bb, instr.gpr0, GetTemporary(0));
+            SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
+        }
+        break;
+    }
+    case OpCode::Id::LD_L:
+        LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
+        [[fallthrough]];
+    case OpCode::Id::LD_S: {
+        const auto GetAddress = [&](s32 offset) {
+            ASSERT(offset % 4 == 0);
+            const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
+            return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
+        };
+        const auto GetMemory = [&](s32 offset) {
+            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
+                                                             : GetLocalMemory(GetAddress(offset));
+        };
+
+        switch (instr.ldst_sl.type.Value()) {
+        case StoreType::Signed16:
+            SetRegister(bb, instr.gpr0,
+                        Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
+            break;
+        case StoreType::Bits32:
+        case StoreType::Bits64:
+        case StoreType::Bits128: {
+            const u32 count = [&] {
+                switch (instr.ldst_sl.type.Value()) {
+                case StoreType::Bits32:
+                    return 1;
+                case StoreType::Bits64:
+                    return 2;
+                case StoreType::Bits128:
+                    return 4;
+                default:
+                    UNREACHABLE();
+                    return 0;
+                }
+            }();
+            for (u32 i = 0; i < count; ++i) {
+                SetTemporary(bb, i, GetMemory(i * 4));
+            }
+            for (u32 i = 0; i < count; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+            }
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
+                              instr.ldst_sl.type.Value());
+        }
+        break;
+    }
+    case OpCode::Id::LD:
+    case OpCode::Id::LDG: {
+        const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::LD:
+                UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
+                return instr.generic.type;
+            case OpCode::Id::LDG:
+                return instr.ldg.type;
+            default:
+                UNREACHABLE();
+                return {};
+            }
+        }();
+
+        const auto [real_address_base, base_address, descriptor] =
+            TrackGlobalMemory(bb, instr, true, false);
+
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
+        if (!real_address_base || !base_address) {
+            // Tracking failed, load zeroes.
+            for (u32 i = 0; i < count; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
+            }
+            break;
+        }
+
+        for (u32 i = 0; i < count; ++i) {
+            const Node it_offset = Immediate(i * 4);
+            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
+            Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+
+            // To handle unaligned loads get the bytes used to dereference global memory and extract
+            // those bytes from the loaded u32.
+            if (IsUnaligned(type)) {
+                gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
+            }
+
+            SetTemporary(bb, i, gmem);
+        }
+
+        for (u32 i = 0; i < count; ++i) {
+            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+        }
+        break;
+    }
+    case OpCode::Id::ST_A: {
+        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
+                             "Indirect attribute loads are not supported");
+        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
+                             "Unaligned attribute loads are not supported");
+
+        u64 element = instr.attribute.fmt20.element;
+        auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+            Node dest;
+            if (instr.attribute.fmt20.patch) {
+                const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
+                dest = MakeNode<PatchNode>(offset);
+            } else {
+                dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
+                                          GetRegister(instr.gpr39));
+            }
+            const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
+
+            bb.push_back(Operation(OperationCode::Assign, dest, src));
+
+            // Load the next attribute element into the following register. If the element to load
+            // goes beyond the vec4 size, load the first element of the next attribute.
+            element = (element + 1) % 4;
+            index = index + (element == 0 ? 1 : 0);
+        }
+        break;
+    }
+    case OpCode::Id::ST_L:
+        LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
+        [[fallthrough]];
+    case OpCode::Id::ST_S: {
+        const auto GetAddress = [&](s32 offset) {
+            ASSERT(offset % 4 == 0);
+            const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
+            return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
+        };
+
+        const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
+        const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
+        const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
+
+        switch (instr.ldst_sl.type.Value()) {
+        case StoreType::Bits128:
+            (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
+            (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
+            [[fallthrough]];
+        case StoreType::Bits64:
+            (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
+            [[fallthrough]];
+        case StoreType::Bits32:
+            (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
+            break;
+        case StoreType::Signed16: {
+            Node address = GetAddress(0);
+            Node memory = (this->*get_memory)(address);
+            (this->*set_memory)(
+                bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
+                              instr.ldst_sl.type.Value());
+        }
+        break;
+    }
+    case OpCode::Id::ST:
+    case OpCode::Id::STG: {
+        const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::ST:
+                UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
+                return instr.generic.type;
+            case OpCode::Id::STG:
+                return instr.stg.type;
+            default:
+                UNREACHABLE();
+                return {};
+            }
+        }();
+
+        // For unaligned reads we have to read memory too.
+        const bool is_read = IsUnaligned(type);
+        const auto [real_address_base, base_address, descriptor] =
+            TrackGlobalMemory(bb, instr, is_read, true);
+        if (!real_address_base || !base_address) {
+            // Tracking failed, skip the store.
+            break;
+        }
+
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
+        for (u32 i = 0; i < count; ++i) {
+            const Node it_offset = Immediate(i * 4);
+            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
+            const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+            Node value = GetRegister(instr.gpr0.Value() + i);
+
+            if (IsUnaligned(type)) {
+                const u32 mask = GetUnalignedMask(type);
+                value = InsertUnaligned(gmem, move(value), real_address, mask, size);
+            }
+
+            bb.push_back(Operation(OperationCode::Assign, gmem, value));
+        }
+        break;
+    }
+    case OpCode::Id::RED: {
+        UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
+                             instr.red.type.Value());
+        const auto [real_address, base_address, descriptor] =
+            TrackGlobalMemory(bb, instr, true, true);
+        if (!real_address || !base_address) {
+            // Tracking failed, skip atomic.
+            break;
+        }
+        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+        Node value = GetRegister(instr.gpr0);
+        bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
+        break;
+    }
+    case OpCode::Id::ATOM: {
+        UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
+                                 instr.atom.operation == AtomicOp::Dec ||
+                                 instr.atom.operation == AtomicOp::SafeAdd,
+                             "operation={}", instr.atom.operation.Value());
+        UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
+                                 instr.atom.type == GlobalAtomicType::U64 ||
+                                 instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
+                                 instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
+                             "type={}", instr.atom.type.Value());
+
+        const auto [real_address, base_address, descriptor] =
+            TrackGlobalMemory(bb, instr, true, true);
+        if (!real_address || !base_address) {
+            // Tracking failed, skip atomic.
+            break;
+        }
+
+        const bool is_signed =
+            instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
+        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+        SetRegister(bb, instr.gpr0,
+                    SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
+                                    GetRegister(instr.gpr20)));
+        break;
+    }
+    case OpCode::Id::ATOMS: {
+        UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
+                                 instr.atoms.operation == AtomicOp::Dec,
+                             "operation={}", instr.atoms.operation.Value());
+        UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
+                                 instr.atoms.type == AtomicType::U64,
+                             "type={}", instr.atoms.type.Value());
+        const bool is_signed =
+            instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
+        const s32 offset = instr.atoms.GetImmediateOffset();
+        Node address = GetRegister(instr.gpr8);
+        address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
+        SetRegister(bb, instr.gpr0,
+                    SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
+                                    GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
+        break;
+    }
+    case OpCode::Id::AL2P: {
+        // Ignore al2p.direction since we don't care about it.
+
+        // Calculate emulation fake physical address.
+        const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
+        const Node reg{GetRegister(instr.gpr8)};
+        const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
+
+        // Set the fake address to target register.
+        SetRegister(bb, instr.gpr0, fake_address);
+
+        // Signal the shader IR to declare all possible attributes and varyings
+        uses_physical_attributes = true;
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
+                                                                     Instruction instr,
+                                                                     bool is_read, bool is_write) {
+    const auto addr_register{GetRegister(instr.gmem.gpr)};
+    const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
+
+    const auto [base_address, index, offset] =
+        TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
+    ASSERT_OR_EXECUTE_MSG(
+        base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
+        "Global memory tracking failed");
+
+    bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
+
+    const GlobalMemoryBase descriptor{index, offset};
+    const auto& entry = used_global_memory.try_emplace(descriptor).first;
+    auto& usage = entry->second;
+    usage.is_written |= is_write;
+    usage.is_read |= is_read;
+
+    const auto real_address =
+        Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
+
+    return {real_address, base_address, descriptor};
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -0,0 +1,321 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using std::move;
+using Tegra::Shader::ConditionCode;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::IpaInterpMode;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::PixelImap;
+using Tegra::Shader::Register;
+using Tegra::Shader::SystemVariable;
+
+using Index = Tegra::Shader::Attribute::Index;
+
+u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::NOP: {
+        UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
+        UNIMPLEMENTED_IF(instr.nop.trigger != 0);
+        // With the previous preconditions, this instruction is a no-operation.
+        break;
+    }
+    case OpCode::Id::EXIT: {
+        const ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
+
+        switch (instr.flow.cond) {
+        case Tegra::Shader::FlowCondition::Always:
+            bb.push_back(Operation(OperationCode::Exit));
+            if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
+                // If this is an unconditional exit then just end processing here,
+                // otherwise we have to account for the possibility of the condition
+                // not being met, so continue processing the next instruction.
+                pc = MAX_PROGRAM_LENGTH - 1;
+            }
+            break;
+
+        case Tegra::Shader::FlowCondition::Fcsm_Tr:
+            // TODO(bunnei): What is this used for? If we assume this conditon is not
+            // satisifed, dual vertex shaders in Farming Simulator make more sense
+            UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
+            break;
+
+        default:
+            UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
+        }
+        break;
+    }
+    case OpCode::Id::KIL: {
+        UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
+
+        const ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
+
+        bb.push_back(Operation(OperationCode::Discard));
+        break;
+    }
+    case OpCode::Id::S2R: {
+        const Node value = [this, instr] {
+            switch (instr.sys20) {
+            case SystemVariable::LaneId:
+                return Operation(OperationCode::ThreadId);
+            case SystemVariable::InvocationId:
+                return Operation(OperationCode::InvocationId);
+            case SystemVariable::Ydirection:
+                return Operation(OperationCode::YNegate);
+            case SystemVariable::InvocationInfo:
+                LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
+                return Immediate(0x00ff'0000U);
+            case SystemVariable::WscaleFactorXY:
+                UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
+                return Immediate(0U);
+            case SystemVariable::WscaleFactorZ:
+                UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
+                return Immediate(0U);
+            case SystemVariable::Tid: {
+                Node val = Immediate(0);
+                val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
+                val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
+                val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
+                return val;
+            }
+            case SystemVariable::TidX:
+                return Operation(OperationCode::LocalInvocationIdX);
+            case SystemVariable::TidY:
+                return Operation(OperationCode::LocalInvocationIdY);
+            case SystemVariable::TidZ:
+                return Operation(OperationCode::LocalInvocationIdZ);
+            case SystemVariable::CtaIdX:
+                return Operation(OperationCode::WorkGroupIdX);
+            case SystemVariable::CtaIdY:
+                return Operation(OperationCode::WorkGroupIdY);
+            case SystemVariable::CtaIdZ:
+                return Operation(OperationCode::WorkGroupIdZ);
+            case SystemVariable::EqMask:
+            case SystemVariable::LtMask:
+            case SystemVariable::LeMask:
+            case SystemVariable::GtMask:
+            case SystemVariable::GeMask:
+                uses_warps = true;
+                switch (instr.sys20) {
+                case SystemVariable::EqMask:
+                    return Operation(OperationCode::ThreadEqMask);
+                case SystemVariable::LtMask:
+                    return Operation(OperationCode::ThreadLtMask);
+                case SystemVariable::LeMask:
+                    return Operation(OperationCode::ThreadLeMask);
+                case SystemVariable::GtMask:
+                    return Operation(OperationCode::ThreadGtMask);
+                case SystemVariable::GeMask:
+                    return Operation(OperationCode::ThreadGeMask);
+                default:
+                    UNREACHABLE();
+                    return Immediate(0u);
+                }
+            default:
+                UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
+                return Immediate(0u);
+            }
+        }();
+        SetRegister(bb, instr.gpr0, value);
+
+        break;
+    }
+    case OpCode::Id::BRA: {
+        Node branch;
+        if (instr.bra.constant_buffer == 0) {
+            const u32 target = pc + instr.bra.GetBranchTarget();
+            branch = Operation(OperationCode::Branch, Immediate(target));
+        } else {
+            const u32 target = pc + 1;
+            const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
+            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
+                                                 PRECISE, op_a, Immediate(3));
+            const Node operand =
+                Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
+            branch = Operation(OperationCode::BranchIndirect, operand);
+        }
+
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        if (cc != Tegra::Shader::ConditionCode::T) {
+            bb.push_back(Conditional(GetConditionCode(cc), {branch}));
+        } else {
+            bb.push_back(branch);
+        }
+        break;
+    }
+    case OpCode::Id::BRX: {
+        Node operand;
+        if (instr.brx.constant_buffer != 0) {
+            const s32 target = pc + 1;
+            const Node index = GetRegister(instr.gpr8);
+            const Node op_a =
+                GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
+            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
+                                                 PRECISE, op_a, Immediate(3));
+            operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
+        } else {
+            const s32 target = pc + instr.brx.GetBranchExtend();
+            const Node op_a = GetRegister(instr.gpr8);
+            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
+                                                 PRECISE, op_a, Immediate(3));
+            operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
+        }
+        const Node branch = Operation(OperationCode::BranchIndirect, operand);
+
+        const ConditionCode cc = instr.flow_condition_code;
+        if (cc != ConditionCode::T) {
+            bb.push_back(Conditional(GetConditionCode(cc), {branch}));
+        } else {
+            bb.push_back(branch);
+        }
+        break;
+    }
+    case OpCode::Id::SSY: {
+        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
+                             "Constant buffer flow is not supported");
+
+        if (disable_flow_stack) {
+            break;
+        }
+
+        // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
+        const u32 target = pc + instr.bra.GetBranchTarget();
+        bb.push_back(
+            Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
+        break;
+    }
+    case OpCode::Id::PBK: {
+        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
+                             "Constant buffer PBK is not supported");
+
+        if (disable_flow_stack) {
+            break;
+        }
+
+        // PBK pushes to a stack the address where BRK will jump to.
+        const u32 target = pc + instr.bra.GetBranchTarget();
+        bb.push_back(
+            Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
+        break;
+    }
+    case OpCode::Id::SYNC: {
+        const ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
+
+        if (decompiled) {
+            break;
+        }
+
+        // The SYNC opcode jumps to the address previously set by the SSY opcode
+        bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
+        break;
+    }
+    case OpCode::Id::BRK: {
+        const ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
+        if (decompiled) {
+            break;
+        }
+
+        // The BRK opcode jumps to the address previously set by the PBK opcode
+        bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
+        break;
+    }
+    case OpCode::Id::IPA: {
+        const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
+        const auto attribute = instr.attribute.fmt28;
+        const Index index = attribute.index;
+
+        Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
+                                 : GetInputAttribute(index, attribute.element);
+
+        // Code taken from Ryujinx.
+        if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
+            const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
+            if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
+                Node position_w = GetInputAttribute(Index::Position, 3);
+                value = Operation(OperationCode::FMul, move(value), move(position_w));
+            }
+        }
+
+        if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
+            value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
+        }
+
+        value = GetSaturatedFloat(move(value), instr.ipa.saturate);
+
+        SetRegister(bb, instr.gpr0, move(value));
+        break;
+    }
+    case OpCode::Id::OUT_R: {
+        UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
+                             "Stream buffer is not supported");
+
+        if (instr.out.emit) {
+            // gpr0 is used to store the next address and gpr8 contains the address to emit.
+            // Hardware uses pointers here but we just ignore it
+            bb.push_back(Operation(OperationCode::EmitVertex));
+            SetRegister(bb, instr.gpr0, Immediate(0));
+        }
+        if (instr.out.cut) {
+            bb.push_back(Operation(OperationCode::EndPrimitive));
+        }
+        break;
+    }
+    case OpCode::Id::ISBERD: {
+        UNIMPLEMENTED_IF(instr.isberd.o != 0);
+        UNIMPLEMENTED_IF(instr.isberd.skew != 0);
+        UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
+        UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
+        LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
+        SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
+        break;
+    }
+    case OpCode::Id::BAR: {
+        UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
+        bb.push_back(Operation(OperationCode::Barrier));
+        break;
+    }
+    case OpCode::Id::MEMBAR: {
+        UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
+        const OperationCode type = [instr] {
+            switch (instr.membar.type) {
+            case Tegra::Shader::MembarType::CTA:
+                return OperationCode::MemoryBarrierGroup;
+            case Tegra::Shader::MembarType::GL:
+                return OperationCode::MemoryBarrierGlobal;
+            default:
+                UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
+                return OperationCode::MemoryBarrierGlobal;
+            }
+        }();
+        bb.push_back(Operation(type));
+        break;
+    }
+    case OpCode::Id::DEPBAR: {
+        LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -0,0 +1,68 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+
+u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::PSETP: {
+        const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
+        const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
+
+        // We can't use the constant predicate as destination.
+        ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+        const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
+
+        const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
+        const Node predicate = Operation(combiner, op_a, op_b);
+
+        // Set the primary predicate to the result of Predicate OP SecondPredicate
+        SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
+
+        if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+            // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
+            // enabled
+            SetPredicate(bb, instr.psetp.pred0,
+                         Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
+                                   second_pred));
+        }
+        break;
+    }
+    case OpCode::Id::CSETP: {
+        const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
+        const Node condition_code = GetConditionCode(instr.csetp.cc);
+
+        const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
+
+        if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
+            SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
+        }
+        if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+            const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
+            SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -0,0 +1,46 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+
+    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                         "Condition codes generation in PSET is not implemented");
+
+    const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
+    const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
+    const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
+
+    const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
+    const Node predicate = Operation(combiner, first_pred, second_pred);
+
+    const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
+    const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
+    const Node value =
+        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
+
+    if (instr.pset.bf) {
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+    } else {
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+    }
+    SetRegister(bb, instr.gpr0, value);
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -0,0 +1,86 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using std::move;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+namespace {
+constexpr u64 NUM_CONDITION_CODES = 4;
+constexpr u64 NUM_PREDICATES = 7;
+} // namespace
+
+u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    Node apply_mask = [this, opcode, instr] {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::R2P_IMM:
+        case OpCode::Id::P2R_IMM:
+            return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+
+    const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
+
+    const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
+    const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
+    const auto get_entry = [this, cc](u64 entry) {
+        return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
+    };
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::R2P_IMM: {
+        Node mask = GetRegister(instr.gpr8);
+
+        for (u64 entry = 0; entry < num_entries; ++entry) {
+            const u32 shift = static_cast<u32>(entry);
+
+            Node apply = BitfieldExtract(apply_mask, shift, 1);
+            Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
+
+            Node compare = BitfieldExtract(mask, offset + shift, 1);
+            Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
+
+            Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
+            bb.push_back(Conditional(condition, {move(code)}));
+        }
+        break;
+    }
+    case OpCode::Id::P2R_IMM: {
+        Node value = Immediate(0);
+        for (u64 entry = 0; entry < num_entries; ++entry) {
+            Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
+                                 Immediate(0));
+            value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
+        }
+        value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
+        value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
+
+        SetRegister(bb, instr.gpr0, move(value));
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
+        break;
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -0,0 +1,153 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using std::move;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::ShfType;
+using Tegra::Shader::ShfXmode;
+
+namespace {
+
+Node IsFull(Node shift) {
+    return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
+}
+
+Node Shift(OperationCode opcode, Node value, Node shift) {
+    Node shifted = Operation(opcode, move(value), shift);
+    return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
+}
+
+Node ClampShift(Node shift, s32 size = 32) {
+    shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
+    return Operation(OperationCode::IMin, move(shift), Immediate(size));
+}
+
+Node WrapShift(Node shift, s32 size = 32) {
+    return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
+}
+
+Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
+    // These values are used when the shift value is less than 32
+    Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
+    Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
+    Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
+
+    if (type == ShfType::Bits32) {
+        // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
+        return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
+    }
+
+    // And these when it's larger than or 32
+    const bool is_signed = type == ShfType::S64;
+    const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
+    Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
+    Node greater = Shift(opcode, high, move(reduced));
+
+    Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
+    Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
+
+    Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
+    return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
+}
+
+Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
+    // These values are used when the shift value is less than 32
+    Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
+    Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
+    Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
+
+    if (type == ShfType::Bits32) {
+        // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
+        return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
+    }
+
+    // And these when it's larger than or 32
+    Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
+    Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
+
+    Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
+    Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
+
+    Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
+    return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
+}
+
+} // Anonymous namespace
+
+u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = [this, instr] {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+        }
+    }();
+
+    switch (const auto opid = opcode->get().GetId(); opid) {
+    case OpCode::Id::SHR_C:
+    case OpCode::Id::SHR_R:
+    case OpCode::Id::SHR_IMM: {
+        op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
+
+        Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
+                                     move(op_a), move(op_b));
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, move(value));
+        break;
+    }
+    case OpCode::Id::SHL_C:
+    case OpCode::Id::SHL_R:
+    case OpCode::Id::SHL_IMM: {
+        Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, move(value));
+        break;
+    }
+    case OpCode::Id::SHF_RIGHT_R:
+    case OpCode::Id::SHF_RIGHT_IMM:
+    case OpCode::Id::SHF_LEFT_R:
+    case OpCode::Id::SHF_LEFT_IMM: {
+        UNIMPLEMENTED_IF(instr.generates_cc);
+        UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
+                             instr.shf.xmode.Value());
+
+        if (instr.is_b_imm) {
+            op_b = Immediate(static_cast<u32>(instr.shf.immediate));
+        }
+        const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
+        Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
+
+        Node negated_shift = Operation(OperationCode::INegate, shift);
+        Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
+
+        const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
+        Node value = (is_right ? ShiftRight : ShiftLeft)(
+            move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
+
+        SetRegister(bb, instr.gpr0, move(value));
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -0,0 +1,928 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <vector>
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/registry.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
+using Tegra::Shader::TextureMiscMode;
+using Tegra::Shader::TextureProcessMode;
+using Tegra::Shader::TextureType;
+
+static std::size_t GetCoordCount(TextureType texture_type) {
+    switch (texture_type) {
+    case TextureType::Texture1D:
+        return 1;
+    case TextureType::Texture2D:
+        return 2;
+    case TextureType::Texture3D:
+    case TextureType::TextureCube:
+        return 3;
+    default:
+        UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
+        return 0;
+    }
+}
+
+u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+    bool is_bindless = false;
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::TEX: {
+        const TextureType texture_type{instr.tex.texture_type};
+        const bool is_array = instr.tex.array != 0;
+        const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
+        const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.tex.GetTextureProcessMode();
+        WriteTexInstructionFloat(
+            bb, instr,
+            GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
+        break;
+    }
+    case OpCode::Id::TEX_B: {
+        UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+
+        const TextureType texture_type{instr.tex_b.texture_type};
+        const bool is_array = instr.tex_b.array != 0;
+        const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
+        const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.tex_b.GetTextureProcessMode();
+        WriteTexInstructionFloat(bb, instr,
+                                 GetTexCode(instr, texture_type, process_mode, depth_compare,
+                                            is_array, is_aoffi, {instr.gpr20}));
+        break;
+    }
+    case OpCode::Id::TEXS: {
+        const TextureType texture_type{instr.texs.GetTextureType()};
+        const bool is_array{instr.texs.IsArrayTexture()};
+        const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.texs.GetTextureProcessMode();
+
+        const Node4 components =
+            GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
+
+        if (instr.texs.fp32_flag) {
+            WriteTexsInstructionFloat(bb, instr, components);
+        } else {
+            WriteTexsInstructionHalfFloat(bb, instr, components);
+        }
+        break;
+    }
+    case OpCode::Id::TLD4_B: {
+        is_bindless = true;
+        [[fallthrough]];
+    }
+    case OpCode::Id::TLD4: {
+        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
+                             "NDV is not implemented");
+        const auto texture_type = instr.tld4.texture_type.Value();
+        const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
+                                               : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
+        const bool is_array = instr.tld4.array != 0;
+        const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
+                                          : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
+        const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
+                                        : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
+        WriteTexInstructionFloat(bb, instr,
+                                 GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
+                                             is_ptp, is_bindless));
+        break;
+    }
+    case OpCode::Id::TLD4S: {
+        constexpr std::size_t num_coords = 2;
+        const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
+        const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
+        const Node op_a = GetRegister(instr.gpr8);
+        const Node op_b = GetRegister(instr.gpr20);
+
+        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
+        std::vector<Node> coords;
+        std::vector<Node> aoffi;
+        Node depth_compare;
+        if (is_depth_compare) {
+            // Note: TLD4S coordinate encoding works just like TEXS's
+            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
+            coords.push_back(op_a);
+            coords.push_back(op_y);
+            if (is_aoffi) {
+                aoffi = GetAoffiCoordinates(op_b, num_coords, true);
+                depth_compare = GetRegister(instr.gpr20.Value() + 1);
+            } else {
+                depth_compare = op_b;
+            }
+        } else {
+            // There's no depth compare
+            coords.push_back(op_a);
+            if (is_aoffi) {
+                coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+                aoffi = GetAoffiCoordinates(op_b, num_coords, true);
+            } else {
+                coords.push_back(op_b);
+            }
+        }
+        const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
+
+        SamplerInfo info;
+        info.is_shadow = is_depth_compare;
+        const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
+
+        Node4 values;
+        for (u32 element = 0; element < values.size(); ++element) {
+            MetaTexture meta{*sampler, {}, depth_compare, aoffi,   {}, {},
+                             {},       {}, component,     element, {}};
+            values[element] = Operation(OperationCode::TextureGather, meta, coords);
+        }
+
+        if (instr.tld4s.fp16_flag) {
+            WriteTexsInstructionHalfFloat(bb, instr, values, true);
+        } else {
+            WriteTexsInstructionFloat(bb, instr, values, true);
+        }
+        break;
+    }
+    case OpCode::Id::TXD_B:
+        is_bindless = true;
+        [[fallthrough]];
+    case OpCode::Id::TXD: {
+        UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+
+        const bool is_array = instr.txd.is_array != 0;
+        const auto derivate_reg = instr.gpr20.Value();
+        const auto texture_type = instr.txd.texture_type.Value();
+        const auto coord_count = GetCoordCount(texture_type);
+        u64 base_reg = instr.gpr8.Value();
+        Node index_var;
+        SamplerInfo info;
+        info.type = texture_type;
+        info.is_array = is_array;
+        const std::optional<SamplerEntry> sampler =
+            is_bindless ? GetBindlessSampler(base_reg, info, index_var)
+                        : GetSampler(instr.sampler, info);
+        Node4 values;
+        if (!sampler) {
+            std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
+            WriteTexInstructionFloat(bb, instr, values);
+            break;
+        }
+
+        if (is_bindless) {
+            base_reg++;
+        }
+
+        std::vector<Node> coords;
+        std::vector<Node> derivates;
+        for (std::size_t i = 0; i < coord_count; ++i) {
+            coords.push_back(GetRegister(base_reg + i));
+            const std::size_t derivate = i * 2;
+            derivates.push_back(GetRegister(derivate_reg + derivate));
+            derivates.push_back(GetRegister(derivate_reg + derivate + 1));
+        }
+
+        Node array_node = {};
+        if (is_array) {
+            const Node info_reg = GetRegister(base_reg + coord_count);
+            array_node = BitfieldExtract(info_reg, 0, 16);
+        }
+
+        for (u32 element = 0; element < values.size(); ++element) {
+            MetaTexture meta{*sampler, array_node, {}, {},      {},       derivates,
+                             {},       {},         {}, element, index_var};
+            values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
+        }
+
+        WriteTexInstructionFloat(bb, instr, values);
+
+        break;
+    }
+    case OpCode::Id::TXQ_B:
+        is_bindless = true;
+        [[fallthrough]];
+    case OpCode::Id::TXQ: {
+        Node index_var;
+        const std::optional<SamplerEntry> sampler =
+            is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
+                        : GetSampler(instr.sampler, {});
+
+        if (!sampler) {
+            u32 indexer = 0;
+            for (u32 element = 0; element < 4; ++element) {
+                if (!instr.txq.IsComponentEnabled(element)) {
+                    continue;
+                }
+                const Node value = Immediate(0);
+                SetTemporary(bb, indexer++, value);
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+            }
+            break;
+        }
+
+        u32 indexer = 0;
+        switch (instr.txq.query_type) {
+        case Tegra::Shader::TextureQueryType::Dimension: {
+            for (u32 element = 0; element < 4; ++element) {
+                if (!instr.txq.IsComponentEnabled(element)) {
+                    continue;
+                }
+                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
+                const Node value =
+                    Operation(OperationCode::TextureQueryDimensions, meta,
+                              GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
+                SetTemporary(bb, indexer++, value);
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+            }
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
+        }
+        break;
+    }
+    case OpCode::Id::TMML_B:
+        is_bindless = true;
+        [[fallthrough]];
+    case OpCode::Id::TMML: {
+        UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+                             "NDV is not implemented");
+
+        const auto texture_type = instr.tmml.texture_type.Value();
+        const bool is_array = instr.tmml.array != 0;
+        SamplerInfo info;
+        info.type = texture_type;
+        info.is_array = is_array;
+        Node index_var;
+        const std::optional<SamplerEntry> sampler =
+            is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
+                        : GetSampler(instr.sampler, info);
+
+        if (!sampler) {
+            u32 indexer = 0;
+            for (u32 element = 0; element < 2; ++element) {
+                if (!instr.tmml.IsComponentEnabled(element)) {
+                    continue;
+                }
+                const Node value = Immediate(0);
+                SetTemporary(bb, indexer++, value);
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+            }
+            break;
+        }
+
+        const u64 base_index = is_array ? 1 : 0;
+        const u64 num_components = [texture_type] {
+            switch (texture_type) {
+            case TextureType::Texture1D:
+                return 1;
+            case TextureType::Texture2D:
+                return 2;
+            case TextureType::TextureCube:
+                return 3;
+            default:
+                UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
+                return 2;
+            }
+        }();
+        // TODO: What's the array component used for?
+
+        std::vector<Node> coords;
+        coords.reserve(num_components);
+        for (u64 component = 0; component < num_components; ++component) {
+            coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
+        }
+
+        u32 indexer = 0;
+        for (u32 element = 0; element < 2; ++element) {
+            if (!instr.tmml.IsComponentEnabled(element)) {
+                continue;
+            }
+            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
+            Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
+            SetTemporary(bb, indexer++, std::move(value));
+        }
+        for (u32 i = 0; i < indexer; ++i) {
+            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+        }
+        break;
+    }
+    case OpCode::Id::TLD: {
+        UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
+
+        WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
+        break;
+    }
+    case OpCode::Id::TLDS: {
+        const TextureType texture_type{instr.tlds.GetTextureType()};
+        const bool is_array{instr.tlds.IsArrayTexture()};
+
+        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
+
+        const Node4 components = GetTldsCode(instr, texture_type, is_array);
+
+        if (instr.tlds.fp32_flag) {
+            WriteTexsInstructionFloat(bb, instr, components);
+        } else {
+            WriteTexsInstructionHalfFloat(bb, instr, components);
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
+    SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
+    if (info.IsComplete()) {
+        return info;
+    }
+    if (!sampler) {
+        LOG_WARNING(HW_GPU, "Unknown sampler info");
+        info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
+        info.is_array = info.is_array.value_or(false);
+        info.is_shadow = info.is_shadow.value_or(false);
+        info.is_buffer = info.is_buffer.value_or(false);
+        return info;
+    }
+    info.type = info.type.value_or(sampler->texture_type);
+    info.is_array = info.is_array.value_or(sampler->is_array != 0);
+    info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0);
+    info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0);
+    return info;
+}
+
+std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
+                                                 SamplerInfo sampler_info) {
+    const u32 offset = static_cast<u32>(sampler.index.Value());
+    const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
+
+    // If this sampler has already been used, return the existing mapping.
+    const auto it =
+        std::find_if(used_samplers.begin(), used_samplers.end(),
+                     [offset](const SamplerEntry& entry) { return entry.offset == offset; });
+    if (it != used_samplers.end()) {
+        ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
+               it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
+        return *it;
+    }
+
+    // Otherwise create a new mapping for this sampler
+    const auto next_index = static_cast<u32>(used_samplers.size());
+    return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array,
+                                      *info.is_shadow, *info.is_buffer, false);
+}
+
+std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
+                                                         SamplerInfo info, Node& index_var) {
+    const Node sampler_register = GetRegister(reg);
+    const auto [base_node, tracked_sampler_info] =
+        TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
+    if (!base_node) {
+        UNREACHABLE();
+        return std::nullopt;
+    }
+
+    if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
+        const u32 buffer = sampler_info->index;
+        const u32 offset = sampler_info->offset;
+        info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
+
+        // If this sampler has already been used, return the existing mapping.
+        const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
+                                     [buffer, offset](const SamplerEntry& entry) {
+                                         return entry.buffer == buffer && entry.offset == offset;
+                                     });
+        if (it != used_samplers.end()) {
+            ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
+                   it->is_shadow == info.is_shadow);
+            return *it;
+        }
+
+        // Otherwise create a new mapping for this sampler
+        const auto next_index = static_cast<u32>(used_samplers.size());
+        return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
+                                          *info.is_shadow, *info.is_buffer, false);
+    }
+    if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
+        const std::pair indices = sampler_info->indices;
+        const std::pair offsets = sampler_info->offsets;
+        info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
+
+        // Try to use an already created sampler if it exists
+        const auto it =
+            std::find_if(used_samplers.begin(), used_samplers.end(),
+                         [indices, offsets](const SamplerEntry& entry) {
+                             return offsets == std::pair{entry.offset, entry.secondary_offset} &&
+                                    indices == std::pair{entry.buffer, entry.secondary_buffer};
+                         });
+        if (it != used_samplers.end()) {
+            ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
+                   it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
+            return *it;
+        }
+
+        // Otherwise create a new mapping for this sampler
+        const u32 next_index = static_cast<u32>(used_samplers.size());
+        return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
+                                          *info.is_shadow, *info.is_buffer);
+    }
+    if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
+        const u32 base_offset = sampler_info->base_offset / 4;
+        index_var = GetCustomVariable(sampler_info->bindless_var);
+        info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
+
+        // If this sampler has already been used, return the existing mapping.
+        const auto it = std::find_if(
+            used_samplers.begin(), used_samplers.end(),
+            [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
+        if (it != used_samplers.end()) {
+            ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
+                   it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
+                   it->is_indexed);
+            return *it;
+        }
+
+        uses_indexed_samplers = true;
+        // Otherwise create a new mapping for this sampler
+        const auto next_index = static_cast<u32>(used_samplers.size());
+        return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array,
+                                          *info.is_shadow, *info.is_buffer, true);
+    }
+    return std::nullopt;
+}
+
+void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
+    u32 dest_elem = 0;
+    for (u32 elem = 0; elem < 4; ++elem) {
+        if (!instr.tex.IsComponentEnabled(elem)) {
+            // Skip disabled components
+            continue;
+        }
+        SetTemporary(bb, dest_elem++, components[elem]);
+    }
+    // After writing values in temporals, move them to the real registers
+    for (u32 i = 0; i < dest_elem; ++i) {
+        SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+    }
+}
+
+void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
+                                         bool ignore_mask) {
+    // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
+    // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
+
+    u32 dest_elem = 0;
+    for (u32 component = 0; component < 4; ++component) {
+        if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
+            continue;
+        SetTemporary(bb, dest_elem++, components[component]);
+    }
+
+    for (u32 i = 0; i < dest_elem; ++i) {
+        if (i < 2) {
+            // Write the first two swizzle components to gpr0 and gpr0+1
+            SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
+        } else {
+            ASSERT(instr.texs.HasTwoDestinations());
+            // Write the rest of the swizzle components to gpr28 and gpr28+1
+            SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
+        }
+    }
+}
+
+void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
+                                             const Node4& components, bool ignore_mask) {
+    // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
+    // float instruction).
+
+    Node4 values;
+    u32 dest_elem = 0;
+    for (u32 component = 0; component < 4; ++component) {
+        if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
+            continue;
+        values[dest_elem++] = components[component];
+    }
+    if (dest_elem == 0)
+        return;
+
+    std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
+
+    const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
+    if (dest_elem <= 2) {
+        SetRegister(bb, instr.gpr0, first_value);
+        return;
+    }
+
+    SetTemporary(bb, 0, first_value);
+    SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
+
+    SetRegister(bb, instr.gpr0, GetTemporary(0));
+    SetRegister(bb, instr.gpr28, GetTemporary(1));
+}
+
+Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
+                               TextureProcessMode process_mode, std::vector<Node> coords,
+                               Node array, Node depth_compare, u32 bias_offset,
+                               std::vector<Node> aoffi,
+                               std::optional<Tegra::Shader::Register> bindless_reg) {
+    const bool is_array = array != nullptr;
+    const bool is_shadow = depth_compare != nullptr;
+    const bool is_bindless = bindless_reg.has_value();
+
+    ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
+               "Illegal texture type");
+
+    SamplerInfo info;
+    info.type = texture_type;
+    info.is_array = is_array;
+    info.is_shadow = is_shadow;
+    info.is_buffer = false;
+
+    Node index_var;
+    const std::optional<SamplerEntry> sampler =
+        is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
+                    : GetSampler(instr.sampler, info);
+    if (!sampler) {
+        return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
+    }
+
+    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
+                            process_mode == TextureProcessMode::LL ||
+                            process_mode == TextureProcessMode::LLA;
+    const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture;
+
+    Node bias;
+    Node lod;
+    switch (process_mode) {
+    case TextureProcessMode::None:
+        break;
+    case TextureProcessMode::LZ:
+        lod = Immediate(0.0f);
+        break;
+    case TextureProcessMode::LB:
+        // If present, lod or bias are always stored in the register indexed by the gpr20 field with
+        // an offset depending on the usage of the other registers.
+        bias = GetRegister(instr.gpr20.Value() + bias_offset);
+        break;
+    case TextureProcessMode::LL:
+        lod = GetRegister(instr.gpr20.Value() + bias_offset);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
+        break;
+    }
+
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        MetaTexture meta{*sampler, array, depth_compare, aoffi,    {}, {}, bias,
+                         lod,      {},    element,       index_var};
+        values[element] = Operation(opcode, meta, coords);
+    }
+
+    return values;
+}
+
+Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
+                           TextureProcessMode process_mode, bool depth_compare, bool is_array,
+                           bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
+    const bool lod_bias_enabled{
+        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
+
+    const bool is_bindless = bindless_reg.has_value();
+
+    u64 parameter_register = instr.gpr20.Value();
+    if (is_bindless) {
+        ++parameter_register;
+    }
+
+    const u32 bias_lod_offset = (is_bindless ? 1 : 0);
+    if (lod_bias_enabled) {
+        ++parameter_register;
+    }
+
+    const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
+                                                              lod_bias_enabled, 4, 5);
+    const auto coord_count = std::get<0>(coord_counts);
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        coords.push_back(GetRegister(coord_register + i));
+    }
+    // 1D.DC in OpenGL the 2nd component is ignored.
+    if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
+        coords.push_back(Immediate(0.0f));
+    }
+
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+    std::vector<Node> aoffi;
+    if (is_aoffi) {
+        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
+    }
+
+    Node dc;
+    if (depth_compare) {
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+        // or bias are used
+        dc = GetRegister(parameter_register++);
+    }
+
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
+                          aoffi, bindless_reg);
+}
+
+Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
+                            TextureProcessMode process_mode, bool depth_compare, bool is_array) {
+    const bool lod_bias_enabled =
+        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
+
+    const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
+                                                              lod_bias_enabled, 4, 4);
+    const auto coord_count = std::get<0>(coord_counts);
+
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+    const u64 last_coord_register =
+        (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
+            ? static_cast<u64>(instr.gpr20.Value())
+            : coord_register + 1;
+    const u32 bias_offset = coord_count > 2 ? 1 : 0;
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        const bool last = (i == (coord_count - 1)) && (coord_count > 1);
+        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+    }
+
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+    Node dc;
+    if (depth_compare) {
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+        // or bias are used
+        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+        dc = GetRegister(depth_register);
+    }
+
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
+                          {});
+}
+
+Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
+                            bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
+    ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
+
+    const std::size_t coord_count = GetCoordCount(texture_type);
+
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        coords.push_back(GetRegister(coord_register + i));
+    }
+
+    u64 parameter_register = instr.gpr20.Value();
+
+    SamplerInfo info;
+    info.type = texture_type;
+    info.is_array = is_array;
+    info.is_shadow = depth_compare;
+
+    Node index_var;
+    const std::optional<SamplerEntry> sampler =
+        is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
+                    : GetSampler(instr.sampler, info);
+    Node4 values;
+    if (!sampler) {
+        for (u32 element = 0; element < values.size(); ++element) {
+            values[element] = Immediate(0);
+        }
+        return values;
+    }
+
+    std::vector<Node> aoffi, ptp;
+    if (is_aoffi) {
+        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
+    } else if (is_ptp) {
+        ptp = GetPtpCoordinates(
+            {GetRegister(parameter_register++), GetRegister(parameter_register++)});
+    }
+
+    Node dc;
+    if (depth_compare) {
+        dc = GetRegister(parameter_register++);
+    }
+
+    const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
+                                       : Immediate(static_cast<u32>(instr.tld4.component));
+
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto coords_copy = coords;
+        MetaTexture meta{
+            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
+            index_var};
+        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
+    }
+
+    return values;
+}
+
+Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
+    const auto texture_type{instr.tld.texture_type};
+    const bool is_array{instr.tld.is_array != 0};
+    const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
+    const std::size_t coord_count{GetCoordCount(texture_type)};
+
+    u64 gpr8_cursor{instr.gpr8.Value()};
+    const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
+
+    std::vector<Node> coords;
+    coords.reserve(coord_count);
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        coords.push_back(GetRegister(gpr8_cursor++));
+    }
+
+    u64 gpr20_cursor{instr.gpr20.Value()};
+    // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
+    const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
+    // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
+    // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
+
+    const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
+
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto coords_copy = coords;
+        MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
+        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
+    }
+
+    return values;
+}
+
+Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
+    SamplerInfo info;
+    info.type = texture_type;
+    info.is_array = is_array;
+    info.is_shadow = false;
+    const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
+
+    const std::size_t type_coord_count = GetCoordCount(texture_type);
+    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
+
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // if is array gpr20 is used
+    const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
+
+    const u64 last_coord_register =
+        ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
+            ? static_cast<u64>(instr.gpr20.Value())
+            : coord_register + 1;
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < type_coord_count; ++i) {
+        const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
+        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+    }
+
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+    // When lod is used always is in gpr20
+    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
+
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto coords_copy = coords;
+        MetaTexture meta{*sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
+        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
+    }
+    return values;
+}
+
+std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
+    TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
+    std::size_t max_coords, std::size_t max_inputs) {
+    const std::size_t coord_count = GetCoordCount(texture_type);
+
+    std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
+    const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
+    if (total_coord_count > max_coords || total_reg_count > max_inputs) {
+        UNIMPLEMENTED_MSG("Unsupported Texture operation");
+        total_coord_count = std::min(total_coord_count, max_coords);
+    }
+    // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
+    total_coord_count +=
+        (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
+
+    return {coord_count, total_coord_count};
+}
+
+std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
+                                                bool is_tld4) {
+    const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
+    const u32 size = is_tld4 ? 6 : 4;
+    const s32 wrap_value = is_tld4 ? 32 : 8;
+    const s32 diff_value = is_tld4 ? 64 : 16;
+    const u32 mask = (1U << size) - 1;
+
+    std::vector<Node> aoffi;
+    aoffi.reserve(coord_count);
+
+    const auto aoffi_immediate{
+        TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
+    if (!aoffi_immediate) {
+        // Variable access, not supported on AMD.
+        LOG_WARNING(HW_GPU,
+                    "AOFFI constant folding failed, some hardware might have graphical issues");
+        for (std::size_t coord = 0; coord < coord_count; ++coord) {
+            const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
+            const Node condition =
+                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
+            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
+            aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
+        }
+        return aoffi;
+    }
+
+    for (std::size_t coord = 0; coord < coord_count; ++coord) {
+        s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
+        if (value >= wrap_value) {
+            value -= diff_value;
+        }
+        aoffi.push_back(Immediate(value));
+    }
+    return aoffi;
+}
+
+std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
+    static constexpr u32 num_entries = 8;
+
+    std::vector<Node> ptp;
+    ptp.reserve(num_entries);
+
+    const auto global_size = static_cast<s64>(global_code.size());
+    const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
+    const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
+    if (!low || !high) {
+        for (u32 entry = 0; entry < num_entries; ++entry) {
+            const u32 reg = entry / 4;
+            const u32 offset = entry % 4;
+            const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
+            const Node condition =
+                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
+            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
+            ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
+        }
+        return ptp;
+    }
+
+    const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
+    for (u32 entry = 0; entry < num_entries; ++entry) {
+        s32 value = (immediate >> (entry * 8)) & 0b111111;
+        if (value >= 32) {
+            value -= 64;
+        }
+        ptp.push_back(Immediate(value));
+    }
+
+    return ptp;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -0,0 +1,169 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using std::move;
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::VideoType;
+using Tegra::Shader::VmadShr;
+using Tegra::Shader::VmnmxOperation;
+using Tegra::Shader::VmnmxType;
+
+u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    if (opcode->get().GetId() == OpCode::Id::VMNMX) {
+        DecodeVMNMX(bb, instr);
+        return pc;
+    }
+
+    const Node op_a =
+        GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
+                        instr.video.type_a, instr.video.byte_height_a);
+    const Node op_b = [this, instr] {
+        if (instr.video.use_register_b) {
+            return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
+                                   instr.video.signed_b, instr.video.type_b,
+                                   instr.video.byte_height_b);
+        }
+        if (instr.video.signed_b) {
+            const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
+            return Immediate(static_cast<u32>(imm));
+        } else {
+            return Immediate(instr.alu.GetImm20_16());
+        }
+    }();
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::VMAD: {
+        const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
+        const Node op_c = GetRegister(instr.gpr39);
+
+        Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
+        value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
+
+        if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
+            const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
+            value =
+                SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
+        }
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    case OpCode::Id::VSETP: {
+        // We can't use the constant predicate as destination.
+        ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+        const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
+        const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
+        const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
+
+        const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
+
+        // Set the primary predicate to the result of Predicate OP SecondPredicate
+        SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
+
+        if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+            // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+            // if enabled
+            const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
+            SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
+                               u64 byte_height) {
+    if (!is_chunk) {
+        return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
+    }
+
+    switch (type) {
+    case VideoType::Size16_Low:
+        return BitfieldExtract(op, 0, 16);
+    case VideoType::Size16_High:
+        return BitfieldExtract(op, 16, 16);
+    case VideoType::Size32:
+        // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
+        // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
+        UNIMPLEMENTED();
+        return Immediate(0);
+    case VideoType::Invalid:
+        UNREACHABLE_MSG("Invalid instruction encoding");
+        return Immediate(0);
+    default:
+        UNREACHABLE();
+        return Immediate(0);
+    }
+}
+
+void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
+    UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
+    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
+    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
+    UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
+    UNIMPLEMENTED_IF(instr.vmnmx.sat);
+    UNIMPLEMENTED_IF(instr.generates_cc);
+
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = GetRegister(instr.gpr20);
+    Node op_c = GetRegister(instr.gpr39);
+
+    const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
+    const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
+
+    const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
+    Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
+
+    switch (instr.vmnmx.operation) {
+    case VmnmxOperation::Mrg_16H:
+        value = BitfieldInsert(move(op_c), move(value), 16, 16);
+        break;
+    case VmnmxOperation::Mrg_16L:
+        value = BitfieldInsert(move(op_c), move(value), 0, 16);
+        break;
+    case VmnmxOperation::Mrg_8B0:
+        value = BitfieldInsert(move(op_c), move(value), 0, 8);
+        break;
+    case VmnmxOperation::Mrg_8B2:
+        value = BitfieldInsert(move(op_c), move(value), 16, 8);
+        break;
+    case VmnmxOperation::Acc:
+        value = Operation(OperationCode::IAdd, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Min:
+        value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Max:
+        value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Nop:
+        break;
+    default:
+        UNREACHABLE();
+        break;
+    }
+
+    SetRegister(bb, instr.gpr0, move(value));
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -0,0 +1,117 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::ShuffleOperation;
+using Tegra::Shader::VoteOperation;
+
+namespace {
+
+OperationCode GetOperationCode(VoteOperation vote_op) {
+    switch (vote_op) {
+    case VoteOperation::All:
+        return OperationCode::VoteAll;
+    case VoteOperation::Any:
+        return OperationCode::VoteAny;
+    case VoteOperation::Eq:
+        return OperationCode::VoteEqual;
+    default:
+        UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
+        return OperationCode::VoteAll;
+    }
+}
+
+} // Anonymous namespace
+
+u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    // Signal the backend that this shader uses warp instructions.
+    uses_warps = true;
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::VOTE: {
+        const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
+        const Node active = Operation(OperationCode::BallotThread, value);
+        const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
+        SetRegister(bb, instr.gpr0, active);
+        SetPredicate(bb, instr.vote.dest_pred, vote);
+        break;
+    }
+    case OpCode::Id::SHFL: {
+        Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
+                                           : GetRegister(instr.gpr39);
+        Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
+                                             : GetRegister(instr.gpr20);
+
+        Node thread_id = Operation(OperationCode::ThreadId);
+        Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
+        Node seg_mask = BitfieldExtract(mask, 8, 16);
+
+        Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
+        Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
+        Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
+                                       Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
+
+        Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
+            switch (instr.shfl.operation) {
+            case ShuffleOperation::Idx:
+                return Operation(OperationCode::IBitwiseOr,
+                                 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
+                                 min_thread_id);
+            case ShuffleOperation::Down:
+                return Operation(OperationCode::IAdd, thread_id, index);
+            case ShuffleOperation::Up:
+                return Operation(OperationCode::IAdd, thread_id,
+                                 Operation(OperationCode::INegate, index));
+            case ShuffleOperation::Bfly:
+                return Operation(OperationCode::IBitwiseXor, thread_id, index);
+            }
+            UNREACHABLE();
+            return Immediate(0U);
+        }();
+
+        Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
+            if (instr.shfl.operation == ShuffleOperation::Up) {
+                return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
+            } else {
+                return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
+            }
+        }();
+
+        SetPredicate(bb, instr.shfl.pred48, in_bounds);
+        SetRegister(
+            bb, instr.gpr0,
+            Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
+        break;
+    }
+    case OpCode::Id::FSWZADD: {
+        UNIMPLEMENTED_IF(instr.fswzadd.ndv);
+
+        Node op_a = GetRegister(instr.gpr8);
+        Node op_b = GetRegister(instr.gpr20);
+        Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
+        SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
+        break;
+    }
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -0,0 +1,156 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
+
+u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED_IF(instr.xmad.sign_a);
+    UNIMPLEMENTED_IF(instr.xmad.sign_b);
+    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                         "Condition codes generation in XMAD is not implemented");
+
+    Node op_a = GetRegister(instr.gpr8);
+
+    // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
+    UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
+    const bool is_signed_a = instr.xmad.sign_a == 1;
+    const bool is_signed_b = instr.xmad.sign_b == 1;
+    const bool is_signed_c = is_signed_a;
+
+    auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
+          op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::XMAD_CR:
+            return {instr.xmad.merge_56,
+                    instr.xmad.product_shift_left_second,
+                    instr.xmad.high_b,
+                    instr.xmad.mode_cbf,
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
+                    GetRegister(instr.gpr39)};
+        case OpCode::Id::XMAD_RR:
+            return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
+                    instr.xmad.mode,     GetRegister(instr.gpr20),      GetRegister(instr.gpr39)};
+        case OpCode::Id::XMAD_RC:
+            return {false,
+                    false,
+                    instr.xmad.high_b,
+                    instr.xmad.mode_cbf,
+                    GetRegister(instr.gpr39),
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+        case OpCode::Id::XMAD_IMM:
+            return {instr.xmad.merge_37,
+                    instr.xmad.product_shift_left,
+                    false,
+                    instr.xmad.mode,
+                    Immediate(static_cast<u32>(instr.xmad.imm20_16)),
+                    GetRegister(instr.gpr39)};
+        default:
+            UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
+            return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
+        }
+    }();
+
+    op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
+                           instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
+
+    const Node original_b = op_b_binding;
+    const Node op_b =
+        SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
+                        is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
+
+    // we already check sign_a and sign_b is difference or not before so just use one in here.
+    Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
+    if (is_psl) {
+        product =
+            SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
+    }
+    SetTemporary(bb, 0, product);
+    product = GetTemporary(0);
+
+    Node original_c = op_c;
+    const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
+    op_c = [&] {
+        switch (set_mode) {
+        case Tegra::Shader::XmadMode::None:
+            return original_c;
+        case Tegra::Shader::XmadMode::CLo:
+            return BitfieldExtract(std::move(original_c), 0, 16);
+        case Tegra::Shader::XmadMode::CHi:
+            return BitfieldExtract(std::move(original_c), 16, 16);
+        case Tegra::Shader::XmadMode::CBcc: {
+            Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
+                                             original_b, Immediate(16));
+            return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
+                                   std::move(shifted_b));
+        }
+        case Tegra::Shader::XmadMode::CSfu: {
+            const Node comp_a =
+                GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
+            const Node comp_b =
+                GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
+            const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
+
+            const Node comp_minus_a = GetPredicateComparisonInteger(
+                PredCondition::NE, is_signed_a,
+                SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
+                                Immediate(0x80000000)),
+                Immediate(0));
+            const Node comp_minus_b = GetPredicateComparisonInteger(
+                PredCondition::NE, is_signed_b,
+                SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
+                                Immediate(0x80000000)),
+                Immediate(0));
+
+            Node new_c = Operation(
+                OperationCode::Select, comp_minus_a,
+                SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
+                original_c);
+            new_c = Operation(
+                OperationCode::Select, comp_minus_b,
+                SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
+                std::move(new_c));
+
+            return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
+        }
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+
+    SetTemporary(bb, 1, op_c);
+    op_c = GetTemporary(1);
+
+    // TODO(Rodrigo): Use an appropiate sign for this operation
+    Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
+    SetTemporary(bb, 2, sum);
+    sum = GetTemporary(2);
+    if (is_merge) {
+        const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
+                                       Immediate(0), Immediate(16));
+        const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
+                                       Immediate(16));
+        sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
+    }
+
+    SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
+    SetRegister(bb, instr.gpr0, std::move(sum));
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader