From 2d48a7b4d0666ad16d03a22d85712617a0849046 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 9 Jan 2021 03:30:07 -0300
Subject: shader: Initial recompiler work

---
 .../frontend/ir/microinstruction.cpp               | 189 +++++++++++++++++++++
 1 file changed, 189 insertions(+)
 create mode 100644 src/shader_recompiler/frontend/ir/microinstruction.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
new file mode 100644
index 0000000000..553fec3b7f
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -0,0 +1,189 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/ir/type.h"
+
+namespace Shader::IR {
+
+static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
+    if (inst && inst->Opcode() != opcode) {
+        throw LogicError("Invalid pseudo-instruction");
+    }
+}
+
+static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
+    if (dest_inst) {
+        throw LogicError("Only one of each type of pseudo-op allowed");
+    }
+    dest_inst = pseudo_inst;
+}
+
+static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
+    if (inst->Opcode() != expected_opcode) {
+        throw LogicError("Undoing use of invalid pseudo-op");
+    }
+    inst = nullptr;
+}
+
+bool Inst::MayHaveSideEffects() const noexcept {
+    switch (op) {
+    case Opcode::SetAttribute:
+    case Opcode::SetAttributeIndexed:
+    case Opcode::WriteGlobalU8:
+    case Opcode::WriteGlobalS8:
+    case Opcode::WriteGlobalU16:
+    case Opcode::WriteGlobalS16:
+    case Opcode::WriteGlobal32:
+    case Opcode::WriteGlobal64:
+    case Opcode::WriteGlobal128:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool Inst::IsPseudoInstruction() const noexcept {
+    switch (op) {
+    case Opcode::GetZeroFromOp:
+    case Opcode::GetSignFromOp:
+    case Opcode::GetCarryFromOp:
+    case Opcode::GetOverflowFromOp:
+    case Opcode::GetZSCOFromOp:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool Inst::HasAssociatedPseudoOperation() const noexcept {
+    return zero_inst || sign_inst || carry_inst || overflow_inst || zsco_inst;
+}
+
+Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
+    // This is faster than doing a search through the block.
+    switch (opcode) {
+    case Opcode::GetZeroFromOp:
+        CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp);
+        return zero_inst;
+    case Opcode::GetSignFromOp:
+        CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp);
+        return sign_inst;
+    case Opcode::GetCarryFromOp:
+        CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp);
+        return carry_inst;
+    case Opcode::GetOverflowFromOp:
+        CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp);
+        return overflow_inst;
+    case Opcode::GetZSCOFromOp:
+        CheckPseudoInstruction(zsco_inst, Opcode::GetZSCOFromOp);
+        return zsco_inst;
+    default:
+        throw InvalidArgument("{} is not a pseudo-instruction", opcode);
+    }
+}
+
+size_t Inst::NumArgs() const {
+    return NumArgsOf(op);
+}
+
+IR::Type Inst::Type() const {
+    return TypeOf(op);
+}
+
+Value Inst::Arg(size_t index) const {
+    if (index >= NumArgsOf(op)) {
+        throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
+    }
+    return args[index];
+}
+
+void Inst::SetArg(size_t index, Value value) {
+    if (index >= NumArgsOf(op)) {
+        throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
+    }
+    if (!args[index].IsImmediate()) {
+        UndoUse(args[index]);
+    }
+    if (!value.IsImmediate()) {
+        Use(value);
+    }
+    args[index] = value;
+}
+
+void Inst::Invalidate() {
+    ClearArgs();
+    op = Opcode::Void;
+}
+
+void Inst::ClearArgs() {
+    for (auto& value : args) {
+        if (!value.IsImmediate()) {
+            UndoUse(value);
+        }
+        value = {};
+    }
+}
+
+void Inst::ReplaceUsesWith(Value replacement) {
+    Invalidate();
+
+    op = Opcode::Identity;
+
+    if (!replacement.IsImmediate()) {
+        Use(replacement);
+    }
+    args[0] = replacement;
+}
+
+void Inst::Use(const Value& value) {
+    ++value.Inst()->use_count;
+
+    switch (op) {
+    case Opcode::GetZeroFromOp:
+        SetPseudoInstruction(value.Inst()->zero_inst, this);
+        break;
+    case Opcode::GetSignFromOp:
+        SetPseudoInstruction(value.Inst()->sign_inst, this);
+        break;
+    case Opcode::GetCarryFromOp:
+        SetPseudoInstruction(value.Inst()->carry_inst, this);
+        break;
+    case Opcode::GetOverflowFromOp:
+        SetPseudoInstruction(value.Inst()->overflow_inst, this);
+        break;
+    case Opcode::GetZSCOFromOp:
+        SetPseudoInstruction(value.Inst()->zsco_inst, this);
+        break;
+    default:
+        break;
+    }
+}
+
+void Inst::UndoUse(const Value& value) {
+    --value.Inst()->use_count;
+
+    switch (op) {
+    case Opcode::GetZeroFromOp:
+        RemovePseudoInstruction(value.Inst()->zero_inst, Opcode::GetZeroFromOp);
+        break;
+    case Opcode::GetSignFromOp:
+        RemovePseudoInstruction(value.Inst()->sign_inst, Opcode::GetSignFromOp);
+        break;
+    case Opcode::GetCarryFromOp:
+        RemovePseudoInstruction(value.Inst()->carry_inst, Opcode::GetCarryFromOp);
+        break;
+    case Opcode::GetOverflowFromOp:
+        RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp);
+        break;
+    case Opcode::GetZSCOFromOp:
+        RemovePseudoInstruction(value.Inst()->zsco_inst, Opcode::GetZSCOFromOp);
+        break;
+    default:
+        break;
+    }
+}
+
+} // namespace Shader::IR
-- 
cgit v1.2.3-70-g09d2


From 6c4cc0cd062fbbba5349da1108d3c23cb330ca8a Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 2 Feb 2021 21:07:00 -0300
Subject: shader: SSA and dominance

---
 src/shader_recompiler/CMakeLists.txt               |   3 +
 src/shader_recompiler/frontend/ir/basic_block.cpp  |  51 +++++--
 src/shader_recompiler/frontend/ir/basic_block.h    |  20 ++-
 src/shader_recompiler/frontend/ir/function.cpp     |   5 +
 src/shader_recompiler/frontend/ir/function.h       |  25 ++++
 .../frontend/ir/microinstruction.cpp               |  22 +++
 .../frontend/ir/microinstruction.h                 |  10 ++
 src/shader_recompiler/frontend/ir/opcode.inc       |   8 ++
 src/shader_recompiler/frontend/ir/pred.h           |   7 +
 src/shader_recompiler/frontend/ir/reg.h            |   9 +-
 src/shader_recompiler/frontend/ir/value.cpp        |  37 +++++
 src/shader_recompiler/frontend/ir/value.h          |   3 +
 .../frontend/maxwell/control_flow.cpp              | 130 ++++++++++++++++-
 .../frontend/maxwell/control_flow.h                |  44 +++++-
 src/shader_recompiler/frontend/maxwell/program.cpp |  75 +++++-----
 src/shader_recompiler/frontend/maxwell/program.h   |  11 +-
 .../frontend/maxwell/termination_code.cpp          |   7 +
 .../frontend/maxwell/termination_code.h            |   1 +
 .../frontend/maxwell/translate/impl/impl.h         |   4 +-
 .../maxwell/translate/impl/not_implemented.cpp     |   6 +-
 .../ir_opt/identity_removal_pass.cpp               |   1 -
 src/shader_recompiler/ir_opt/passes.h              |   9 ++
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  | 155 +++++++++++++++++++++
 src/shader_recompiler/main.cpp                     |   4 +-
 24 files changed, 570 insertions(+), 77 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/ir/function.cpp
 create mode 100644 src/shader_recompiler/frontend/ir/function.h
 create mode 100644 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index c65846bc44..36a61f21ac 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -11,6 +11,8 @@ add_executable(shader_recompiler
     frontend/ir/condition.h
     frontend/ir/flow_test.cpp
     frontend/ir/flow_test.h
+    frontend/ir/function.cpp
+    frontend/ir/function.h
     frontend/ir/ir_emitter.cpp
     frontend/ir/ir_emitter.h
     frontend/ir/microinstruction.cpp
@@ -51,6 +53,7 @@ add_executable(shader_recompiler
     ir_opt/get_set_elimination_pass.cpp
     ir_opt/identity_removal_pass.cpp
     ir_opt/passes.h
+    ir_opt/ssa_rewrite_pass.cpp
     ir_opt/verification_pass.cpp
     main.cpp
 )
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index 0406726ad9..e795618fcf 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -37,6 +37,10 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
     return result_it;
 }
 
+void Block::AddImmediatePredecessor(IR::Block* immediate_predecessor) {
+    imm_predecessors.push_back(immediate_predecessor);
+}
+
 u32 Block::LocationBegin() const noexcept {
     return location_begin;
 }
@@ -53,6 +57,18 @@ const Block::InstructionList& Block::Instructions() const noexcept {
     return instructions;
 }
 
+std::span<IR::Block* const> Block::ImmediatePredecessors() const noexcept {
+    return imm_predecessors;
+}
+
+static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index,
+                                Block* block) {
+    if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) {
+        return fmt::format("{{Block ${}}}", it->second);
+    }
+    return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block));
+}
+
 static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_index,
                               const std::map<const Inst*, size_t>& inst_to_index,
                               const Value& arg) {
@@ -60,10 +76,7 @@ static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_ind
         return "<null>";
     }
     if (arg.IsLabel()) {
-        if (const auto it{block_to_index.find(arg.Label())}; it != block_to_index.end()) {
-            return fmt::format("{{Block ${}}}", it->second);
-        }
-        return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(arg.Label()));
+        return BlockToIndex(block_to_index, arg.Label());
     }
     if (!arg.IsImmediate()) {
         if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) {
@@ -115,16 +128,26 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
         } else {
             ret += fmt::format("         {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
         }
-        const size_t arg_count{NumArgsOf(op)};
-        for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
-            const Value arg{inst.Arg(arg_index)};
-            ret += arg_index != 0 ? ", " : " ";
-            ret += ArgToIndex(block_to_index, inst_to_index, arg);
-
-            const Type actual_type{arg.Type()};
-            const Type expected_type{ArgTypeOf(op, arg_index)};
-            if (!AreTypesCompatible(actual_type, expected_type)) {
-                ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
+        if (op == Opcode::Phi) {
+            size_t val_index{0};
+            for (const auto& [phi_block, phi_val] : inst.PhiOperands()) {
+                ret += val_index != 0 ? ", " : " ";
+                ret += fmt::format("[ {}, {} ]", ArgToIndex(block_to_index, inst_to_index, phi_val),
+                                   BlockToIndex(block_to_index, phi_block));
+                ++val_index;
+            }
+        } else {
+            const size_t arg_count{NumArgsOf(op)};
+            for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
+                const Value arg{inst.Arg(arg_index)};
+                ret += arg_index != 0 ? ", " : " ";
+                ret += ArgToIndex(block_to_index, inst_to_index, arg);
+
+                const Type actual_type{arg.Type()};
+                const Type expected_type{ArgTypeOf(op, arg_index)};
+                if (!AreTypesCompatible(actual_type, expected_type)) {
+                    ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
+                }
             }
         }
         if (TypeOf(op) != Type::Void) {
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
index 3ed2eb9571..4b6b80c4b2 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.h
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -6,6 +6,8 @@
 
 #include <initializer_list>
 #include <map>
+#include <span>
+#include <vector>
 
 #include <boost/intrusive/list.hpp>
 #include <boost/pool/pool_alloc.hpp>
@@ -36,7 +38,11 @@ public:
     void AppendNewInst(Opcode op, std::initializer_list<Value> args);
 
     /// Prepends a new instruction to this basic block before the insertion point.
-    iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args);
+    iterator PrependNewInst(iterator insertion_point, Opcode op,
+                            std::initializer_list<Value> args = {});
+
+    /// Adds a new immediate predecessor to the basic block.
+    void AddImmediatePredecessor(IR::Block* immediate_predecessor);
 
     /// Gets the starting location of this basic block.
     [[nodiscard]] u32 LocationBegin() const noexcept;
@@ -44,9 +50,12 @@ public:
     [[nodiscard]] u32 LocationEnd() const noexcept;
 
     /// Gets a mutable reference to the instruction list for this basic block.
-    InstructionList& Instructions() noexcept;
+    [[nodiscard]] InstructionList& Instructions() noexcept;
     /// Gets an immutable reference to the instruction list for this basic block.
-    const InstructionList& Instructions() const noexcept;
+    [[nodiscard]] const InstructionList& Instructions() const noexcept;
+
+    /// Gets an immutable span to the immediate predecessors.
+    [[nodiscard]] std::span<IR::Block* const> ImmediatePredecessors() const noexcept;
 
     [[nodiscard]] bool empty() const {
         return instructions.empty();
@@ -115,13 +124,16 @@ private:
     /// End location of this block
     u32 location_end;
 
-    /// List of instructions in this block.
+    /// List of instructions in this block
     InstructionList instructions;
 
     /// Memory pool for instruction list
     boost::fast_pool_allocator<Inst, boost::default_user_allocator_malloc_free,
                                boost::details::pool::null_mutex>
         instruction_alloc_pool;
+
+    /// Block immediate predecessors
+    std::vector<IR::Block*> imm_predecessors;
 };
 
 [[nodiscard]] std::string DumpBlock(const Block& block);
diff --git a/src/shader_recompiler/frontend/ir/function.cpp b/src/shader_recompiler/frontend/ir/function.cpp
new file mode 100644
index 0000000000..d1fc9461d0
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/function.cpp
@@ -0,0 +1,5 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/function.h"
diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h
new file mode 100644
index 0000000000..2d4dc5b981
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/function.h
@@ -0,0 +1,25 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+
+namespace Shader::IR {
+
+struct Function {
+    struct InplaceDelete {
+        void operator()(IR::Block* block) const noexcept {
+            std::destroy_at(block);
+        }
+    };
+    using UniqueBlock = std::unique_ptr<IR::Block, InplaceDelete>;
+
+    std::vector<UniqueBlock> blocks;
+};
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 553fec3b7f..ecf76e23d8 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -30,6 +30,11 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode)
 
 bool Inst::MayHaveSideEffects() const noexcept {
     switch (op) {
+    case Opcode::Branch:
+    case Opcode::BranchConditional:
+    case Opcode::Exit:
+    case Opcode::Return:
+    case Opcode::Unreachable:
     case Opcode::SetAttribute:
     case Opcode::SetAttributeIndexed:
     case Opcode::WriteGlobalU8:
@@ -113,6 +118,17 @@ void Inst::SetArg(size_t index, Value value) {
     args[index] = value;
 }
 
+std::span<const std::pair<Block*, Value>> Inst::PhiOperands() const noexcept {
+    return phi_operands;
+}
+
+void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
+    if (!value.IsImmediate()) {
+        Use(value);
+    }
+    phi_operands.emplace_back(predecessor, value);
+}
+
 void Inst::Invalidate() {
     ClearArgs();
     op = Opcode::Void;
@@ -125,6 +141,12 @@ void Inst::ClearArgs() {
         }
         value = {};
     }
+    for (auto& [phi_block, phi_op] : phi_operands) {
+        if (!phi_op.IsImmediate()) {
+            UndoUse(phi_op);
+        }
+    }
+    phi_operands.clear();
 }
 
 void Inst::ReplaceUsesWith(Value replacement) {
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 43460b9505..7f1ed6710c 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -5,6 +5,8 @@
 #pragma once
 
 #include <array>
+#include <span>
+#include <vector>
 
 #include <boost/intrusive/list.hpp>
 
@@ -15,6 +17,8 @@
 
 namespace Shader::IR {
 
+class Block;
+
 constexpr size_t MAX_ARG_COUNT = 4;
 
 class Inst : public boost::intrusive::list_base_hook<> {
@@ -59,6 +63,11 @@ public:
     /// Set the value of a given argument index.
     void SetArg(size_t index, Value value);
 
+    /// Get an immutable span to the phi operands.
+    [[nodiscard]] std::span<const std::pair<Block*, Value>> PhiOperands() const noexcept;
+    /// Add phi operand to a phi instruction.
+    void AddPhiOperand(Block* predecessor, const Value& value);
+
     void Invalidate();
     void ClearArgs();
 
@@ -76,6 +85,7 @@ private:
     Inst* carry_inst{};
     Inst* overflow_inst{};
     Inst* zsco_inst{};
+    std::vector<std::pair<Block*, Value>> phi_operands;
     u64 flags{};
 };
 
diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc
index 371064bf3d..40759e96ab 100644
--- a/src/shader_recompiler/frontend/ir/opcode.inc
+++ b/src/shader_recompiler/frontend/ir/opcode.inc
@@ -5,6 +5,7 @@
 //     opcode name,                                         return type,    arg1 type,      arg2 type,      arg3 type,      arg4 type,      ...
 OPCODE(Void,                                                Void,                                                                           )
 OPCODE(Identity,                                            Opaque,         Opaque,                                                         )
+OPCODE(Phi,                                                 Opaque,         /*todo*/                                                        )
 
 // Control flow
 OPCODE(Branch,                                              Void,           Label,                                                          )
@@ -35,6 +36,13 @@ OPCODE(SetSFlag,                                            Void,           U1,
 OPCODE(SetCFlag,                                            Void,           U1,                                                             )
 OPCODE(SetOFlag,                                            Void,           U1,                                                             )
 
+// Undefined
+OPCODE(Undef1,                                              U1,                                                                             )
+OPCODE(Undef8,                                              U8,                                                                             )
+OPCODE(Undef16,                                             U16,                                                                            )
+OPCODE(Undef32,                                             U32,                                                                            )
+OPCODE(Undef64,                                             U64,                                                                            )
+
 // Memory operations
 OPCODE(WriteGlobalU8,                                       Void,           U64,            U32,                                            )
 OPCODE(WriteGlobalS8,                                       Void,           U64,            U32,                                            )
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h
index 37cc530068..daf23193f2 100644
--- a/src/shader_recompiler/frontend/ir/pred.h
+++ b/src/shader_recompiler/frontend/ir/pred.h
@@ -10,6 +10,13 @@ namespace Shader::IR {
 
 enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT };
 
+constexpr size_t NUM_USER_PREDS = 6;
+constexpr size_t NUM_PREDS = 7;
+
+[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept {
+    return static_cast<size_t>(pred);
+}
+
 } // namespace Shader::IR
 
 template <>
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
index 316fc4be8d..771094eb9a 100644
--- a/src/shader_recompiler/frontend/ir/reg.h
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -271,6 +271,9 @@ enum class Reg : u64 {
 };
 static_assert(static_cast<int>(Reg::RZ) == 255);
 
+constexpr size_t NUM_USER_REGS = 255;
+constexpr size_t NUM_REGS = 256;
+
 [[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
     if (reg == Reg::RZ) {
         // Adding or subtracting registers from RZ yields RZ
@@ -290,8 +293,12 @@ static_assert(static_cast<int>(Reg::RZ) == 255);
     return reg + (-num);
 }
 
+[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
+    return static_cast<size_t>(reg);
+}
+
 [[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) {
-    return (static_cast<size_t>(reg) / align) * align == static_cast<size_t>(reg);
+    return (RegIndex(reg) / align) * align == RegIndex(reg);
 }
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index 7b5b35d6c5..1e974e88c7 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -115,6 +115,43 @@ u64 Value::U64() const {
     return imm_u64;
 }
 
+bool Value::operator==(const Value& other) const {
+    if (type != other.type) {
+        return false;
+    }
+    switch (type) {
+    case Type::Void:
+        return true;
+    case Type::Opaque:
+        return inst == other.inst;
+    case Type::Label:
+        return label == other.label;
+    case Type::Reg:
+        return reg == other.reg;
+    case Type::Pred:
+        return pred == other.pred;
+    case Type::Attribute:
+        return attribute == other.attribute;
+    case Type::U1:
+        return imm_u1 == other.imm_u1;
+    case Type::U8:
+        return imm_u8 == other.imm_u8;
+    case Type::U16:
+        return imm_u16 == other.imm_u16;
+    case Type::U32:
+        return imm_u32 == other.imm_u32;
+    case Type::U64:
+        return imm_u64 == other.imm_u64;
+    case Type::ZSCO:
+        throw NotImplementedException("ZSCO comparison");
+    }
+    throw LogicError("Invalid type {}", type);
+}
+
+bool Value::operator!=(const Value& other) const {
+    return !operator==(other);
+}
+
 void Value::ValidateAccess(IR::Type expected) const {
     if (type != expected) {
         throw LogicError("Reading {} out of {}", expected, type);
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 664dacf9d7..368119921b 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -48,6 +48,9 @@ public:
     [[nodiscard]] u32 U32() const;
     [[nodiscard]] u64 U64() const;
 
+    [[nodiscard]] bool operator==(const Value& other) const;
+    [[nodiscard]] bool operator!=(const Value& other) const;
+
 private:
     void ValidateAccess(IR::Type expected) const;
 
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
index fc4dba8269..21ee981371 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -36,6 +36,7 @@ static std::array<Block, 2> Split(Block&& block, Location pc, BlockId new_id) {
             .cond{true},
             .branch_true{new_id},
             .branch_false{UNREACHABLE_BLOCK_ID},
+            .imm_predecessors{},
         },
         Block{
             .begin{pc},
@@ -46,6 +47,7 @@ static std::array<Block, 2> Split(Block&& block, Location pc, BlockId new_id) {
             .cond{block.cond},
             .branch_true{block.branch_true},
             .branch_false{block.branch_false},
+            .imm_predecessors{},
         },
     };
 }
@@ -108,7 +110,7 @@ static bool HasFlowTest(Opcode opcode) {
     }
 }
 
-static std::string Name(const Block& block) {
+static std::string NameOf(const Block& block) {
     if (block.begin.IsVirtual()) {
         return fmt::format("\"Virtual {}\"", block.id);
     } else {
@@ -154,13 +156,127 @@ bool Block::Contains(Location pc) const noexcept {
 }
 
 Function::Function(Location start_address)
-    : entrypoint{start_address}, labels{Label{
+    : entrypoint{start_address}, labels{{
                                      .address{start_address},
                                      .block_id{0},
                                      .stack{},
                                  }} {}
 
+void Function::BuildBlocksMap() {
+    const size_t num_blocks{NumBlocks()};
+    blocks_map.resize(num_blocks);
+    for (size_t block_index = 0; block_index < num_blocks; ++block_index) {
+        Block& block{blocks_data[block_index]};
+        blocks_map[block.id] = &block;
+    }
+}
+
+void Function::BuildImmediatePredecessors() {
+    for (const Block& block : blocks_data) {
+        if (block.branch_true != UNREACHABLE_BLOCK_ID) {
+            blocks_map[block.branch_true]->imm_predecessors.push_back(block.id);
+        }
+        if (block.branch_false != UNREACHABLE_BLOCK_ID) {
+            blocks_map[block.branch_false]->imm_predecessors.push_back(block.id);
+        }
+    }
+}
+
+void Function::BuildPostOrder() {
+    boost::container::small_vector<BlockId, 0x110> block_stack;
+    post_order_map.resize(NumBlocks());
+
+    Block& first_block{blocks_data[blocks.front()]};
+    first_block.post_order_visited = true;
+    block_stack.push_back(first_block.id);
+
+    const auto visit_branch = [&](BlockId block_id, BlockId branch_id) {
+        if (branch_id == UNREACHABLE_BLOCK_ID) {
+            return false;
+        }
+        if (blocks_map[branch_id]->post_order_visited) {
+            return false;
+        }
+        blocks_map[branch_id]->post_order_visited = true;
+
+        // Calling push_back twice is faster than insert on msvc
+        block_stack.push_back(block_id);
+        block_stack.push_back(branch_id);
+        return true;
+    };
+    while (!block_stack.empty()) {
+        const Block* const block{blocks_map[block_stack.back()]};
+        block_stack.pop_back();
+
+        if (!visit_branch(block->id, block->branch_true) &&
+            !visit_branch(block->id, block->branch_false)) {
+            post_order_map[block->id] = static_cast<u32>(post_order_blocks.size());
+            post_order_blocks.push_back(block->id);
+        }
+    }
+}
+
+void Function::BuildImmediateDominators() {
+    auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })};
+    auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id};
+    auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })};
+    auto intersect{[this](Block* finger1, Block* finger2) {
+        while (finger1 != finger2) {
+            while (post_order_map[finger1->id] < post_order_map[finger2->id]) {
+                finger1 = finger1->imm_dominator;
+            }
+            while (post_order_map[finger2->id] < post_order_map[finger1->id]) {
+                finger2 = finger2->imm_dominator;
+            }
+        }
+        return finger1;
+    }};
+    for (Block& block : blocks_data) {
+        block.imm_dominator = nullptr;
+    }
+    Block* const start_block{&blocks_data[blocks.front()]};
+    start_block->imm_dominator = start_block;
+
+    bool changed{true};
+    while (changed) {
+        changed = false;
+        for (Block* const block : post_order_blocks | reverse_order_but_first) {
+            Block* new_idom{};
+            for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) {
+                new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor;
+            }
+            changed |= block->imm_dominator != new_idom;
+            block->imm_dominator = new_idom;
+        }
+    }
+}
+
+void Function::BuildDominanceFrontier() {
+    auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })};
+    auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }};
+    for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) {
+        for (Block* current : block.imm_predecessors | transform_block_id) {
+            while (current != block.imm_dominator) {
+                current->dominance_frontiers.push_back(current->id);
+                current = current->imm_dominator;
+            }
+        }
+    }
+}
+
 CFG::CFG(Environment& env_, Location start_address) : env{env_} {
+    VisitFunctions(start_address);
+
+    for (Function& function : functions) {
+        function.BuildBlocksMap();
+        function.BuildImmediatePredecessors();
+        function.BuildPostOrder();
+        function.BuildImmediateDominators();
+        function.BuildDominanceFrontier();
+    }
+}
+
+void CFG::VisitFunctions(Location start_address) {
     functions.emplace_back(start_address);
     for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) {
         while (!functions[function_id].labels.empty()) {
@@ -202,6 +318,7 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
         .cond{true},
         .branch_true{UNREACHABLE_BLOCK_ID},
         .branch_false{UNREACHABLE_BLOCK_ID},
+        .imm_predecessors{},
     };
     // Analyze instructions until it reaches an already visited block or there's a branch
     bool is_branch{false};
@@ -310,7 +427,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati
         // Technically CAL pushes into PRET, but that's implicit in the function call for us
         // Insert the function into the list if it doesn't exist
         if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) {
-            functions.push_back(cal_pc);
+            functions.emplace_back(cal_pc);
         }
         // Handle CAL like a regular instruction
         break;
@@ -352,6 +469,7 @@ void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc,
         .cond{cond},
         .branch_true{conditional_block_id},
         .branch_false{UNREACHABLE_BLOCK_ID},
+        .imm_predecessors{},
     })};
     // Set the end properties of the conditional instruction and give it a new identity
     Block& conditional_block{block};
@@ -465,14 +583,14 @@ std::string CFG::Dot() const {
         dot += fmt::format("\t\tnode [style=filled];\n");
         for (const u32 block_index : function.blocks) {
             const Block& block{function.blocks_data[block_index]};
-            const std::string name{Name(block)};
+            const std::string name{NameOf(block)};
             const auto add_branch = [&](BlockId branch_id, bool add_label) {
                 const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)};
                 dot += fmt::format("\t\t{}->", name);
                 if (it == function.blocks_data.end()) {
                     dot += fmt::format("\"Unknown label {}\"", branch_id);
                 } else {
-                    dot += Name(*it);
+                    dot += NameOf(*it);
                 };
                 if (add_label && block.cond != true && block.cond != false) {
                     dot += fmt::format(" [label=\"{}\"]", block.cond);
@@ -520,7 +638,7 @@ std::string CFG::Dot() const {
         if (functions.front().blocks.empty()) {
             dot += "Start;\n";
         } else {
-            dot += fmt::format("\tStart -> {};\n", Name(functions.front().blocks_data.front()));
+            dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front()));
         }
         dot += fmt::format("\tStart [shape=diamond];\n");
     }
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
index b2ab0cdc35..20ada8afd9 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -70,6 +70,12 @@ struct Block {
     IR::Condition cond;
     BlockId branch_true;
     BlockId branch_false;
+    boost::container::small_vector<BlockId, 4> imm_predecessors;
+    boost::container::small_vector<BlockId, 8> dominance_frontiers;
+    union {
+        bool post_order_visited{false};
+        Block* imm_dominator;
+    };
 };
 
 struct Label {
@@ -81,11 +87,30 @@ struct Label {
 struct Function {
     Function(Location start_address);
 
+    void BuildBlocksMap();
+
+    void BuildImmediatePredecessors();
+
+    void BuildPostOrder();
+
+    void BuildImmediateDominators();
+
+    void BuildDominanceFrontier();
+
+    [[nodiscard]] size_t NumBlocks() const noexcept {
+        return static_cast<size_t>(current_block_id) + 1;
+    }
+
     Location entrypoint;
     BlockId current_block_id{0};
     boost::container::small_vector<Label, 16> labels;
     boost::container::small_vector<u32, 0x130> blocks;
     boost::container::small_vector<Block, 0x130> blocks_data;
+    // Translates from BlockId to block index
+    boost::container::small_vector<Block*, 0x130> blocks_map;
+
+    boost::container::small_vector<u32, 0x130> post_order_blocks;
+    boost::container::small_vector<BlockId, 0x130> post_order_map;
 };
 
 class CFG {
@@ -97,6 +122,12 @@ class CFG {
 public:
     explicit CFG(Environment& env, Location start_address);
 
+    CFG& operator=(const CFG&) = delete;
+    CFG(const CFG&) = delete;
+
+    CFG& operator=(CFG&&) = delete;
+    CFG(CFG&&) = delete;
+
     [[nodiscard]] std::string Dot() const;
 
     [[nodiscard]] std::span<const Function> Functions() const noexcept {
@@ -104,20 +135,22 @@ public:
     }
 
 private:
+    void VisitFunctions(Location start_address);
+
     void AnalyzeLabel(FunctionId function_id, Label& label);
 
     /// Inspect already visited blocks.
     /// Return true when the block has already been visited
-    [[nodiscard]] bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
+    bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
 
-    [[nodiscard]] AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc);
+    AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc);
 
     void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class,
                          IR::Condition cond);
 
     /// Return true when the branch instruction is confirmed to be a branch
-    [[nodiscard]] bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc,
-                                     Instruction inst, Opcode opcode);
+    bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst,
+                       Opcode opcode);
 
     void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst,
                     bool is_absolute);
@@ -126,8 +159,7 @@ private:
     AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst);
 
     /// Return the branch target block id
-    [[nodiscard]] BlockId AddLabel(const Block& block, Stack stack, Location pc,
-                                   FunctionId function_id);
+    BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id);
 
     Environment& env;
     boost::container::small_vector<Function, 1> functions;
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 67a98ba57a..49d1f4bfb7 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -8,40 +8,53 @@
 #include "shader_recompiler/frontend/maxwell/program.h"
 #include "shader_recompiler/frontend/maxwell/termination_code.h"
 #include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/ir_opt/passes.h"
 
 namespace Shader::Maxwell {
+namespace {
+void TranslateCode(Environment& env, const Flow::Function& cfg_function, IR::Function& function,
+                   std::span<IR::Block*> block_map, IR::Block* block_memory) {
+    const size_t num_blocks{cfg_function.blocks.size()};
+    function.blocks.reserve(num_blocks);
 
-Program::Function::~Function() {
-    std::ranges::for_each(blocks, &std::destroy_at<IR::Block>);
-}
-
-Program::Program(Environment& env, const Flow::CFG& cfg) {
-    std::vector<IR::Block*> block_map;
-    functions.reserve(cfg.Functions().size());
+    for (const Flow::BlockId block_id : cfg_function.blocks) {
+        const Flow::Block& flow_block{cfg_function.blocks_data[block_id]};
 
-    for (const Flow::Function& cfg_function : cfg.Functions()) {
-        Function& function{functions.emplace_back()};
+        function.blocks.emplace_back(std::construct_at(block_memory, Translate(env, flow_block)));
+        block_map[flow_block.id] = function.blocks.back().get();
+        ++block_memory;
+    }
+}
 
-        const size_t num_blocks{cfg_function.blocks.size()};
-        IR::Block* block_memory{block_alloc_pool.allocate(num_blocks)};
-        function.blocks.reserve(num_blocks);
+void EmitTerminationInsts(const Flow::Function& cfg_function,
+                          std::span<IR::Block* const> block_map) {
+    for (const Flow::BlockId block_id : cfg_function.blocks) {
+        const Flow::Block& flow_block{cfg_function.blocks_data[block_id]};
+        EmitTerminationCode(flow_block, block_map);
+    }
+}
 
-        block_map.resize(cfg_function.blocks_data.size());
+void TranslateFunction(Environment& env, const Flow::Function& cfg_function, IR::Function& function,
+                       IR::Block* block_memory) {
+    std::vector<IR::Block*> block_map;
+    block_map.resize(cfg_function.blocks_data.size());
 
-        // Visit the instructions of all blocks
-        for (const Flow::BlockId block_id : cfg_function.blocks) {
-            const Flow::Block& flow_block{cfg_function.blocks_data[block_id]};
+    TranslateCode(env, cfg_function, function, block_map, block_memory);
+    EmitTerminationInsts(cfg_function, block_map);
+}
+} // Anonymous namespace
 
-            IR::Block* const block{std::construct_at(block_memory, Translate(env, flow_block))};
-            ++block_memory;
-            function.blocks.push_back(block);
-            block_map[flow_block.id] = block;
-        }
-        // Now that all blocks are defined, emit the termination instructions
-        for (const Flow::BlockId block_id : cfg_function.blocks) {
-            const Flow::Block& flow_block{cfg_function.blocks_data[block_id]};
-            EmitTerminationCode(flow_block, block_map);
-        }
+Program::Program(Environment& env, const Flow::CFG& cfg) {
+    functions.reserve(cfg.Functions().size());
+    for (const Flow::Function& cfg_function : cfg.Functions()) {
+        TranslateFunction(env, cfg_function, functions.emplace_back(),
+                          block_alloc_pool.allocate(cfg_function.blocks.size()));
+    }
+    std::ranges::for_each(functions, Optimization::SsaRewritePass);
+    for (IR::Function& function : functions) {
+        Optimization::Invoke(Optimization::DeadCodeEliminationPass, function);
+        Optimization::Invoke(Optimization::IdentityRemovalPass, function);
+        // Optimization::Invoke(Optimization::VerificationPass, function);
     }
 }
 
@@ -50,16 +63,16 @@ std::string DumpProgram(const Program& program) {
     std::map<const IR::Inst*, size_t> inst_to_index;
     std::map<const IR::Block*, size_t> block_to_index;
 
-    for (const Program::Function& function : program.functions) {
-        for (const IR::Block* const block : function.blocks) {
-            block_to_index.emplace(block, index);
+    for (const IR::Function& function : program.functions) {
+        for (const auto& block : function.blocks) {
+            block_to_index.emplace(block.get(), index);
             ++index;
         }
     }
     std::string ret;
-    for (const Program::Function& function : program.functions) {
+    for (const IR::Function& function : program.functions) {
         ret += fmt::format("Function\n");
-        for (const IR::Block* const block : function.blocks) {
+        for (const auto& block : function.blocks) {
             ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
         }
     }
diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h
index 7814b2c016..36e678a9ea 100644
--- a/src/shader_recompiler/frontend/maxwell/program.h
+++ b/src/shader_recompiler/frontend/maxwell/program.h
@@ -4,13 +4,16 @@
 
 #pragma once
 
+#include <memory>
 #include <string>
 #include <vector>
 
+#include <boost/container/small_vector.hpp>
 #include <boost/pool/pool_alloc.hpp>
 
 #include "shader_recompiler/environment.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/function.h"
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
 
 namespace Shader::Maxwell {
@@ -22,16 +25,10 @@ public:
     explicit Program(Environment& env, const Flow::CFG& cfg);
 
 private:
-    struct Function {
-        ~Function();
-
-        std::vector<IR::Block*> blocks;
-    };
-
     boost::pool_allocator<IR::Block, boost::default_user_allocator_new_delete,
                           boost::details::pool::null_mutex>
         block_alloc_pool;
-    std::vector<Function> functions;
+    boost::container::small_vector<IR::Function, 1> functions;
 };
 
 [[nodiscard]] std::string DumpProgram(const Program& program);
diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp
index a4ea5c5e38..ed5137f20c 100644
--- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp
+++ b/src/shader_recompiler/frontend/maxwell/termination_code.cpp
@@ -47,12 +47,19 @@ static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) {
 
 static void EmitBranch(const Flow::Block& flow_block, std::span<IR::Block* const> block_map,
                        IR::IREmitter& ir) {
+    const auto add_immediate_predecessor = [&](Flow::BlockId label) {
+        block_map[label]->AddImmediatePredecessor(&ir.block);
+    };
     if (flow_block.cond == true) {
+        add_immediate_predecessor(flow_block.branch_true);
         return ir.Branch(block_map[flow_block.branch_true]);
     }
     if (flow_block.cond == false) {
+        add_immediate_predecessor(flow_block.branch_false);
         return ir.Branch(block_map[flow_block.branch_false]);
     }
+    add_immediate_predecessor(flow_block.branch_true);
+    add_immediate_predecessor(flow_block.branch_false);
     return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true],
                                 block_map[flow_block.branch_false]);
 }
diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h
index b0d667942e..04e0445340 100644
--- a/src/shader_recompiler/frontend/maxwell/termination_code.h
+++ b/src/shader_recompiler/frontend/maxwell/termination_code.h
@@ -11,6 +11,7 @@
 
 namespace Shader::Maxwell {
 
+/// Emit termination instructions and collect immediate predecessors
 void EmitTerminationCode(const Flow::Block& flow_block, std::span<IR::Block* const> block_map);
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index bc607b0025..8be7d6ff1b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -208,7 +208,7 @@ public:
     void P2R_reg(u64 insn);
     void P2R_cbuf(u64 insn);
     void P2R_imm(u64 insn);
-    void PBK(u64 insn);
+    void PBK();
     void PCNT(u64 insn);
     void PEXIT(u64 insn);
     void PIXLD(u64 insn);
@@ -252,7 +252,7 @@ public:
     void SHR_reg(u64 insn);
     void SHR_cbuf(u64 insn);
     void SHR_imm(u64 insn);
-    void SSY(u64 insn);
+    void SSY();
     void ST(u64 insn);
     void STG(u64 insn);
     void STL(u64 insn);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index c907c1ffb5..0f52696d10 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -762,7 +762,7 @@ void TranslatorVisitor::P2R_imm(u64) {
     ThrowNotImplemented(Opcode::P2R_imm);
 }
 
-void TranslatorVisitor::PBK(u64) {
+void TranslatorVisitor::PBK() {
     // PBK is a no-op
 }
 
@@ -938,8 +938,8 @@ void TranslatorVisitor::SHR_imm(u64) {
     ThrowNotImplemented(Opcode::SHR_imm);
 }
 
-void TranslatorVisitor::SSY(u64) {
-    ThrowNotImplemented(Opcode::SSY);
+void TranslatorVisitor::SSY() {
+    // SSY is a no-op
 }
 
 void TranslatorVisitor::ST(u64) {
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
index f9bb063fb1..7f85000878 100644
--- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -28,7 +28,6 @@ void IdentityRemovalPass(IR::Block& block) {
             ++inst;
         }
     }
-
     for (IR::Inst* const inst : to_invalidate) {
         inst->Invalidate();
     }
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index fe5454e9a6..83f094d735 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -5,12 +5,21 @@
 #pragma once
 
 #include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/function.h"
 
 namespace Shader::Optimization {
 
+template <typename Func>
+void Invoke(Func&& func, IR::Function& function) {
+    for (const auto& block : function.blocks) {
+        func(*block);
+    }
+}
+
 void DeadCodeEliminationPass(IR::Block& block);
 void GetSetElimination(IR::Block& block);
 void IdentityRemovalPass(IR::Block& block);
+void SsaRewritePass(IR::Function& function);
 void VerificationPass(const IR::Block& block);
 
 } // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
new file mode 100644
index 0000000000..a4b256a40e
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -0,0 +1,155 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// This file implements the SSA rewriting algorithm proposed in
+//
+//      Simple and Efficient Construction of Static Single Assignment Form.
+//      Braun M., Buchwald S., Hack S., Lei�a R., Mallon C., Zwinkau A. (2013)
+//      In: Jhala R., De Bosschere K. (eds)
+//      Compiler Construction. CC 2013.
+//      Lecture Notes in Computer Science, vol 7791.
+//      Springer, Berlin, Heidelberg
+//
+//      https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
+//
+
+#include <map>
+
+#include <boost/container/flat_map.hpp>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/function.h"
+#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/ir/opcode.h"
+#include "shader_recompiler/frontend/ir/pred.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>;
+
+struct DefTable {
+    [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept {
+        return regs[IR::RegIndex(variable)];
+    }
+
+    [[nodiscard]] ValueMap& operator[](IR::Pred variable) noexcept {
+        return preds[IR::PredIndex(variable)];
+    }
+
+    std::array<ValueMap, IR::NUM_USER_REGS> regs;
+    std::array<ValueMap, IR::NUM_USER_PREDS> preds;
+};
+
+IR::Opcode UndefOpcode(IR::Reg) noexcept {
+    return IR::Opcode::Undef32;
+}
+
+IR::Opcode UndefOpcode(IR::Pred) noexcept {
+    return IR::Opcode::Undef1;
+}
+
+[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {
+    return inst.Opcode() == IR::Opcode::Phi;
+}
+
+class Pass {
+public:
+    void WriteVariable(auto variable, IR::Block* block, const IR::Value& value) {
+        current_def[variable].insert_or_assign(block, value);
+    }
+
+    IR::Value ReadVariable(auto variable, IR::Block* block) {
+        auto& def{current_def[variable]};
+        if (const auto it{def.find(block)}; it != def.end()) {
+            return it->second;
+        }
+        return ReadVariableRecursive(variable, block);
+    }
+
+private:
+    IR::Value ReadVariableRecursive(auto variable, IR::Block* block) {
+        IR::Value val;
+        if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) {
+            val = ReadVariable(variable, preds.front());
+        } else {
+            // Break potential cycles with operandless phi
+            val = IR::Value{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
+            WriteVariable(variable, block, val);
+            val = AddPhiOperands(variable, val, block);
+        }
+        WriteVariable(variable, block, val);
+        return val;
+    }
+
+    IR::Value AddPhiOperands(auto variable, const IR::Value& phi, IR::Block* block) {
+        for (IR::Block* const pred : block->ImmediatePredecessors()) {
+            phi.Inst()->AddPhiOperand(pred, ReadVariable(variable, pred));
+        }
+        return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
+    }
+
+    IR::Value TryRemoveTrivialPhi(const IR::Value& phi, IR::Block* block, IR::Opcode undef_opcode) {
+        IR::Value same;
+        for (const auto& pair : phi.Inst()->PhiOperands()) {
+            const IR::Value& op{pair.second};
+            if (op == same || op == phi) {
+                // Unique value or self-reference
+                continue;
+            }
+            if (!same.IsEmpty()) {
+                // The phi merges at least two values: not trivial
+                return phi;
+            }
+            same = op;
+        }
+        if (same.IsEmpty()) {
+            // The phi is unreachable or in the start block
+            const auto first_not_phi{std::ranges::find_if_not(block->Instructions(), IsPhi)};
+            same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)};
+        }
+        // Reroute all uses of phi to same and remove phi
+        phi.Inst()->ReplaceUsesWith(same);
+        // TODO: Try to recursively remove all phi users, which might have become trivial
+        return same;
+    }
+
+    DefTable current_def;
+};
+} // Anonymous namespace
+
+void SsaRewritePass(IR::Function& function) {
+    Pass pass;
+    for (const auto& block : function.blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            switch (inst.Opcode()) {
+            case IR::Opcode::SetRegister:
+                if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
+                    pass.WriteVariable(reg, block.get(), inst.Arg(1));
+                }
+                break;
+            case IR::Opcode::SetPred:
+                if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
+                    pass.WriteVariable(pred, block.get(), inst.Arg(1));
+                }
+                break;
+            case IR::Opcode::GetRegister:
+                if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
+                    inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get()));
+                }
+                break;
+            case IR::Opcode::GetPred:
+                if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
+                    inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get()));
+                }
+                break;
+            default:
+                break;
+            }
+        }
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp
index 39f0bf3336..e3c9ad6e8f 100644
--- a/src/shader_recompiler/main.cpp
+++ b/src/shader_recompiler/main.cpp
@@ -35,12 +35,12 @@ void RunDatabase() {
     ForEachFile("D:\\Shaders\\Database", [&](const std::filesystem::path& path) {
         map.emplace_back(std::make_unique<FileEnvironment>(path.string().c_str()));
     });
-    for (int i = 0; i < 1; ++i) {
+    for (int i = 0; i < 300; ++i) {
         for (auto& env : map) {
             // fmt::print(stdout, "Decoding {}\n", path.string());
             const Location start_address{0};
             auto cfg{std::make_unique<Flow::CFG>(*env, start_address)};
-            // fmt::print(stdout, "{}\n", cfg.Dot());
+            // fmt::print(stdout, "{}\n", cfg->Dot());
             // IR::Program program{env, cfg};
             // Optimize(program);
             // const std::string code{EmitGLASM(program)};
-- 
cgit v1.2.3-70-g09d2


From e81739493a0cacc1efe3295f9d287d5d31b1a989 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 5 Feb 2021 05:58:02 -0300
Subject: shader: Constant propagation and global memory to storage buffer

---
 src/shader_recompiler/CMakeLists.txt               |   2 +
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  20 +-
 src/shader_recompiler/frontend/ir/ir_emitter.h     |   5 +-
 .../frontend/ir/microinstruction.cpp               |  26 +-
 .../frontend/ir/microinstruction.h                 |   4 +-
 src/shader_recompiler/frontend/ir/opcode.inc       |  22 +-
 src/shader_recompiler/frontend/ir/type.cpp         |   2 +-
 src/shader_recompiler/frontend/ir/type.h           |   1 -
 src/shader_recompiler/frontend/ir/value.cpp        |  17 +-
 src/shader_recompiler/frontend/ir/value.h          |   1 -
 src/shader_recompiler/frontend/maxwell/program.cpp |   6 +-
 .../ir_opt/constant_propagation_pass.cpp           | 146 +++++++++
 .../global_memory_to_storage_buffer_pass.cpp       | 331 +++++++++++++++++++++
 .../ir_opt/identity_removal_pass.cpp               |  28 +-
 src/shader_recompiler/ir_opt/passes.h              |   6 +-
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  |  56 +++-
 src/shader_recompiler/ir_opt/verification_pass.cpp |  42 +--
 17 files changed, 652 insertions(+), 63 deletions(-)
 create mode 100644 src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
 create mode 100644 src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index f5dd4d29ea..72d5f41d21 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -59,7 +59,9 @@ add_executable(shader_recompiler
     frontend/maxwell/translate/impl/move_special_register.cpp
     frontend/maxwell/translate/translate.cpp
     frontend/maxwell/translate/translate.h
+    ir_opt/constant_propagation_pass.cpp
     ir_opt/dead_code_elimination_pass.cpp
+    ir_opt/global_memory_to_storage_buffer_pass.cpp
     ir_opt/identity_removal_pass.cpp
     ir_opt/passes.h
     ir_opt/ssa_rewrite_pass.cpp
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 87b253c9ac..1c5ae0109b 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -504,6 +504,20 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
     }
 }
 
+U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
+    if (a.Type() != b.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+    }
+    switch (a.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::ISub32, a, b);
+    case Type::U64:
+        return Inst<U64>(Opcode::ISub64, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
 U32 IREmitter::IMul(const U32& a, const U32& b) {
     return Inst<U32>(Opcode::IMul32, a, b);
 }
@@ -679,8 +693,8 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& v
     }
 }
 
-U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) {
-    switch (bitsize) {
+U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) {
+    switch (result_bitsize) {
     case 32:
         switch (value.Type()) {
         case Type::U32:
@@ -703,7 +717,7 @@ U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) {
             break;
         }
     }
-    throw NotImplementedException("Conversion from {} to {} bits", value.Type(), bitsize);
+    throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
 }
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 7ff763ecf4..84b844898f 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -17,6 +17,8 @@ namespace Shader::IR {
 class IREmitter {
 public:
     explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {}
+    explicit IREmitter(Block& block_, Block::iterator insertion_point_)
+        : block{block_}, insertion_point{insertion_point_} {}
 
     Block& block;
 
@@ -125,6 +127,7 @@ public:
     [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value);
 
     [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
+    [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
     [[nodiscard]] U32 IMul(const U32& a, const U32& b);
     [[nodiscard]] U32 INeg(const U32& value);
     [[nodiscard]] U32 IAbs(const U32& value);
@@ -155,7 +158,7 @@ public:
     [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value);
     [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value);
 
-    [[nodiscard]] U32U64 ConvertU(size_t bitsize, const U32U64& value);
+    [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value);
 
 private:
     IR::Block::iterator insertion_point;
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index ecf76e23d8..de953838cc 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
+
 #include "shader_recompiler/exception.h"
 #include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/type.h"
@@ -44,6 +46,13 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::WriteGlobal32:
     case Opcode::WriteGlobal64:
     case Opcode::WriteGlobal128:
+    case Opcode::WriteStorageU8:
+    case Opcode::WriteStorageS8:
+    case Opcode::WriteStorageU16:
+    case Opcode::WriteStorageS16:
+    case Opcode::WriteStorage32:
+    case Opcode::WriteStorage64:
+    case Opcode::WriteStorage128:
         return true;
     default:
         return false;
@@ -56,15 +65,19 @@ bool Inst::IsPseudoInstruction() const noexcept {
     case Opcode::GetSignFromOp:
     case Opcode::GetCarryFromOp:
     case Opcode::GetOverflowFromOp:
-    case Opcode::GetZSCOFromOp:
         return true;
     default:
         return false;
     }
 }
 
+bool Inst::AreAllArgsImmediates() const noexcept {
+    return std::all_of(args.begin(), args.begin() + NumArgs(),
+                       [](const IR::Value& value) { return value.IsImmediate(); });
+}
+
 bool Inst::HasAssociatedPseudoOperation() const noexcept {
-    return zero_inst || sign_inst || carry_inst || overflow_inst || zsco_inst;
+    return zero_inst || sign_inst || carry_inst || overflow_inst;
 }
 
 Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
@@ -82,9 +95,6 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
     case Opcode::GetOverflowFromOp:
         CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp);
         return overflow_inst;
-    case Opcode::GetZSCOFromOp:
-        CheckPseudoInstruction(zsco_inst, Opcode::GetZSCOFromOp);
-        return zsco_inst;
     default:
         throw InvalidArgument("{} is not a pseudo-instruction", opcode);
     }
@@ -176,9 +186,6 @@ void Inst::Use(const Value& value) {
     case Opcode::GetOverflowFromOp:
         SetPseudoInstruction(value.Inst()->overflow_inst, this);
         break;
-    case Opcode::GetZSCOFromOp:
-        SetPseudoInstruction(value.Inst()->zsco_inst, this);
-        break;
     default:
         break;
     }
@@ -200,9 +207,6 @@ void Inst::UndoUse(const Value& value) {
     case Opcode::GetOverflowFromOp:
         RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp);
         break;
-    case Opcode::GetZSCOFromOp:
-        RemovePseudoInstruction(value.Inst()->zsco_inst, Opcode::GetZSCOFromOp);
-        break;
     default:
         break;
     }
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 61849695ac..22101c9e2d 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -49,6 +49,9 @@ public:
     /// Pseudo-instructions depend on their parent instructions for their semantics.
     [[nodiscard]] bool IsPseudoInstruction() const noexcept;
 
+    /// Determines if all arguments of this instruction are immediates.
+    [[nodiscard]] bool AreAllArgsImmediates() const noexcept;
+
     /// Determines if there is a pseudo-operation associated with this instruction.
     [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept;
     /// Gets a pseudo-operation associated with this instruction
@@ -94,7 +97,6 @@ private:
     Inst* sign_inst{};
     Inst* carry_inst{};
     Inst* overflow_inst{};
-    Inst* zsco_inst{};
     std::vector<std::pair<Block*, Value>> phi_operands;
     u64 flags{};
 };
diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc
index 4ecb5e936d..4596bf39f7 100644
--- a/src/shader_recompiler/frontend/ir/opcode.inc
+++ b/src/shader_recompiler/frontend/ir/opcode.inc
@@ -24,9 +24,6 @@ OPCODE(GetAttribute,                                        U32,            Attr
 OPCODE(SetAttribute,                                        U32,            Attribute,                                                      )
 OPCODE(GetAttributeIndexed,                                 U32,            U32,                                                            )
 OPCODE(SetAttributeIndexed,                                 U32,            U32,                                                            )
-OPCODE(GetZSCORaw,                                          U32,                                                                            )
-OPCODE(SetZSCORaw,                                          Void,           U32,                                                            )
-OPCODE(SetZSCO,                                             Void,           ZSCO,                                                           )
 OPCODE(GetZFlag,                                            U1,             Void,                                                           )
 OPCODE(GetSFlag,                                            U1,             Void,                                                           )
 OPCODE(GetCFlag,                                            U1,             Void,                                                           )
@@ -65,6 +62,22 @@ OPCODE(WriteGlobal32,                                       Void,           U64,
 OPCODE(WriteGlobal64,                                       Void,           U64,            Opaque,                                         )
 OPCODE(WriteGlobal128,                                      Void,           U64,            Opaque,                                         )
 
+// Storage buffer operations
+OPCODE(LoadStorageU8,                                       U32,            U32,            U32,                                            )
+OPCODE(LoadStorageS8,                                       U32,            U32,            U32,                                            )
+OPCODE(LoadStorageU16,                                      U32,            U32,            U32,                                            )
+OPCODE(LoadStorageS16,                                      U32,            U32,            U32,                                            )
+OPCODE(LoadStorage32,                                       U32,            U32,            U32,                                            )
+OPCODE(LoadStorage64,                                       Opaque,         U32,            U32,                                            )
+OPCODE(LoadStorage128,                                      Opaque,         U32,            U32,                                            )
+OPCODE(WriteStorageU8,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageS8,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageU16,                                     Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageS16,                                     Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorage32,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorage64,                                      Void,           U32,            U32,            Opaque,                                         )
+OPCODE(WriteStorage128,                                     Void,           U32,            U32,            Opaque,                                         )
+
 // Vector utility
 OPCODE(CompositeConstruct2,                                 Opaque,         Opaque,         Opaque,                                         )
 OPCODE(CompositeConstruct3,                                 Opaque,         Opaque,         Opaque,         Opaque,                         )
@@ -90,7 +103,6 @@ OPCODE(GetZeroFromOp,                                       U1,             Opaq
 OPCODE(GetSignFromOp,                                       U1,             Opaque,                                                         )
 OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                         )
 OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                         )
-OPCODE(GetZSCOFromOp,                                       ZSCO,           Opaque,                                                         )
 
 // Floating-point operations
 OPCODE(FPAbs16,                                             U16,            U16,                                                            )
@@ -143,6 +155,8 @@ OPCODE(FPTrunc64,                                           U64,            U64,
 // Integer operations
 OPCODE(IAdd32,                                              U32,            U32,            U32,                                            )
 OPCODE(IAdd64,                                              U64,            U64,            U64,                                            )
+OPCODE(ISub32,                                              U32,            U32,            U32,                                            )
+OPCODE(ISub64,                                              U64,            U64,            U64,                                            )
 OPCODE(IMul32,                                              U32,            U32,            U32,                                            )
 OPCODE(INeg32,                                              U32,            U32,                                                            )
 OPCODE(IAbs32,                                              U32,            U32,                                                            )
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp
index da1e2a0f6c..13cc091956 100644
--- a/src/shader_recompiler/frontend/ir/type.cpp
+++ b/src/shader_recompiler/frontend/ir/type.cpp
@@ -11,7 +11,7 @@ namespace Shader::IR {
 
 std::string NameOf(Type type) {
     static constexpr std::array names{
-        "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", "ZSCO",
+        "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64",
     };
     const size_t bits{static_cast<size_t>(type)};
     if (bits == 0) {
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
index f753628e85..397875018b 100644
--- a/src/shader_recompiler/frontend/ir/type.h
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -25,7 +25,6 @@ enum class Type {
     U16 = 1 << 7,
     U32 = 1 << 8,
     U64 = 1 << 9,
-    ZSCO = 1 << 10,
 };
 DECLARE_ENUM_FLAG_OPERATORS(Type)
 
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index 1e974e88c7..59a9b10dc9 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -91,26 +91,41 @@ IR::Attribute Value::Attribute() const {
 }
 
 bool Value::U1() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).U1();
+    }
     ValidateAccess(Type::U1);
     return imm_u1;
 }
 
 u8 Value::U8() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).U8();
+    }
     ValidateAccess(Type::U8);
     return imm_u8;
 }
 
 u16 Value::U16() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).U16();
+    }
     ValidateAccess(Type::U16);
     return imm_u16;
 }
 
 u32 Value::U32() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).U32();
+    }
     ValidateAccess(Type::U32);
     return imm_u32;
 }
 
 u64 Value::U64() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).U64();
+    }
     ValidateAccess(Type::U64);
     return imm_u64;
 }
@@ -142,8 +157,6 @@ bool Value::operator==(const Value& other) const {
         return imm_u32 == other.imm_u32;
     case Type::U64:
         return imm_u64 == other.imm_u64;
-    case Type::ZSCO:
-        throw NotImplementedException("ZSCO comparison");
     }
     throw LogicError("Invalid type {}", type);
 }
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 368119921b..31f8317940 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -96,6 +96,5 @@ using U64 = TypedValue<Type::U64>;
 using U32U64 = TypedValue<Type::U32 | Type::U64>;
 using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
 using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
-using ZSCO = TypedValue<Type::ZSCO>;
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index bd1f96c079..b3f2de852f 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -52,9 +52,11 @@ Program::Program(Environment& env, const Flow::CFG& cfg) {
     }
     std::ranges::for_each(functions, Optimization::SsaRewritePass);
     for (IR::Function& function : functions) {
+        Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function);
+        Optimization::Invoke(Optimization::ConstantPropagationPass, function);
         Optimization::Invoke(Optimization::DeadCodeEliminationPass, function);
-        Optimization::Invoke(Optimization::IdentityRemovalPass, function);
-        // Optimization::Invoke(Optimization::VerificationPass, function);
+        Optimization::IdentityRemovalPass(function);
+        Optimization::VerificationPass(function);
     }
     //*/
 }
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
new file mode 100644
index 0000000000..02f5b653d4
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -0,0 +1,146 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <type_traits>
+
+#include "common/bit_util.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+[[nodiscard]] u32 BitFieldUExtract(u32 base, u32 shift, u32 count) {
+    if (static_cast<size_t>(shift) + static_cast<size_t>(count) > Common::BitSize<u32>()) {
+        throw LogicError("Undefined result in BitFieldUExtract({}, {}, {})", base, shift, count);
+    }
+    return (base >> shift) & ((1U << count) - 1);
+}
+
+template <typename T>
+[[nodiscard]] T Arg(const IR::Value& value) {
+    if constexpr (std::is_same_v<T, bool>) {
+        return value.U1();
+    } else if constexpr (std::is_same_v<T, u32>) {
+        return value.U32();
+    } else if constexpr (std::is_same_v<T, u64>) {
+        return value.U64();
+    }
+}
+
+template <typename ImmFn>
+bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
+    const auto arg = [](const IR::Value& value) {
+        if constexpr (std::is_invocable_r_v<bool, ImmFn, bool, bool>) {
+            return value.U1();
+        } else if constexpr (std::is_invocable_r_v<u32, ImmFn, u32, u32>) {
+            return value.U32();
+        } else if constexpr (std::is_invocable_r_v<u64, ImmFn, u64, u64>) {
+            return value.U64();
+        }
+    };
+    const IR::Value lhs{inst.Arg(0)};
+    const IR::Value rhs{inst.Arg(1)};
+
+    const bool is_lhs_immediate{lhs.IsImmediate()};
+    const bool is_rhs_immediate{rhs.IsImmediate()};
+
+    if (is_lhs_immediate && is_rhs_immediate) {
+        const auto result{imm_fn(arg(lhs), arg(rhs))};
+        inst.ReplaceUsesWith(IR::Value{result});
+        return false;
+    }
+    if (is_lhs_immediate && !is_rhs_immediate) {
+        IR::Inst* const rhs_inst{rhs.InstRecursive()};
+        if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) {
+            const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))};
+            inst.SetArg(0, rhs_inst->Arg(0));
+            inst.SetArg(1, IR::Value{combined});
+        } else {
+            // Normalize
+            inst.SetArg(0, rhs);
+            inst.SetArg(1, lhs);
+        }
+    }
+    if (!is_lhs_immediate && is_rhs_immediate) {
+        const IR::Inst* const lhs_inst{lhs.InstRecursive()};
+        if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) {
+            const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))};
+            inst.SetArg(0, lhs_inst->Arg(0));
+            inst.SetArg(1, IR::Value{combined});
+        }
+    }
+    return true;
+}
+
+void FoldGetRegister(IR::Inst& inst) {
+    if (inst.Arg(0).Reg() == IR::Reg::RZ) {
+        inst.ReplaceUsesWith(IR::Value{u32{0}});
+    }
+}
+
+void FoldGetPred(IR::Inst& inst) {
+    if (inst.Arg(0).Pred() == IR::Pred::PT) {
+        inst.ReplaceUsesWith(IR::Value{true});
+    }
+}
+
+template <typename T>
+void FoldAdd(IR::Inst& inst) {
+    if (inst.HasAssociatedPseudoOperation()) {
+        return;
+    }
+    if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) {
+        return;
+    }
+    const IR::Value rhs{inst.Arg(1)};
+    if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
+        inst.ReplaceUsesWith(inst.Arg(0));
+    }
+}
+
+void FoldLogicalAnd(IR::Inst& inst) {
+    if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) {
+        return;
+    }
+    const IR::Value rhs{inst.Arg(1)};
+    if (rhs.IsImmediate()) {
+        if (rhs.U1()) {
+            inst.ReplaceUsesWith(inst.Arg(0));
+        } else {
+            inst.ReplaceUsesWith(IR::Value{false});
+        }
+    }
+}
+
+void ConstantPropagation(IR::Inst& inst) {
+    switch (inst.Opcode()) {
+    case IR::Opcode::GetRegister:
+        return FoldGetRegister(inst);
+    case IR::Opcode::GetPred:
+        return FoldGetPred(inst);
+    case IR::Opcode::IAdd32:
+        return FoldAdd<u32>(inst);
+    case IR::Opcode::IAdd64:
+        return FoldAdd<u64>(inst);
+    case IR::Opcode::BitFieldUExtract:
+        if (inst.AreAllArgsImmediates() && !inst.HasAssociatedPseudoOperation()) {
+            inst.ReplaceUsesWith(IR::Value{
+                BitFieldUExtract(inst.Arg(0).U32(), inst.Arg(1).U32(), inst.Arg(2).U32())});
+        }
+        break;
+    case IR::Opcode::LogicalAnd:
+        return FoldLogicalAnd(inst);
+    default:
+        break;
+    }
+}
+} // Anonymous namespace
+
+void ConstantPropagationPass(IR::Block& block) {
+    std::ranges::for_each(block, ConstantPropagation);
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
new file mode 100644
index 0000000000..ee69a5c9d9
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -0,0 +1,331 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <compare>
+#include <optional>
+#include <ranges>
+
+#include <boost/container/flat_set.hpp>
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+/// Address in constant buffers to the storage buffer descriptor
+struct StorageBufferAddr {
+    auto operator<=>(const StorageBufferAddr&) const noexcept = default;
+
+    u32 index;
+    u32 offset;
+};
+
+/// Block iterator to a global memory instruction and the storage buffer it uses
+struct StorageInst {
+    StorageBufferAddr storage_buffer;
+    IR::Block::iterator inst;
+};
+
+/// Bias towards a certain range of constant buffers when looking for storage buffers
+struct Bias {
+    u32 index;
+    u32 offset_begin;
+    u32 offset_end;
+};
+
+using StorageBufferSet =
+    boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>,
+                               boost::container::small_vector<StorageBufferAddr, 16>>;
+using StorageInstVector = boost::container::small_vector<StorageInst, 32>;
+
+/// Returns true when the instruction is a global memory instruction
+bool IsGlobalMemory(const IR::Inst& inst) {
+    switch (inst.Opcode()) {
+    case IR::Opcode::LoadGlobalS8:
+    case IR::Opcode::LoadGlobalU8:
+    case IR::Opcode::LoadGlobalS16:
+    case IR::Opcode::LoadGlobalU16:
+    case IR::Opcode::LoadGlobal32:
+    case IR::Opcode::LoadGlobal64:
+    case IR::Opcode::LoadGlobal128:
+    case IR::Opcode::WriteGlobalS8:
+    case IR::Opcode::WriteGlobalU8:
+    case IR::Opcode::WriteGlobalS16:
+    case IR::Opcode::WriteGlobalU16:
+    case IR::Opcode::WriteGlobal32:
+    case IR::Opcode::WriteGlobal64:
+    case IR::Opcode::WriteGlobal128:
+        return true;
+    default:
+        return false;
+    }
+}
+
+/// Converts a global memory opcode to its storage buffer equivalent
+IR::Opcode GlobalToStorage(IR::Opcode opcode) {
+    switch (opcode) {
+    case IR::Opcode::LoadGlobalS8:
+        return IR::Opcode::LoadStorageS8;
+    case IR::Opcode::LoadGlobalU8:
+        return IR::Opcode::LoadStorageU8;
+    case IR::Opcode::LoadGlobalS16:
+        return IR::Opcode::LoadStorageS16;
+    case IR::Opcode::LoadGlobalU16:
+        return IR::Opcode::LoadStorageU16;
+    case IR::Opcode::LoadGlobal32:
+        return IR::Opcode::LoadStorage32;
+    case IR::Opcode::LoadGlobal64:
+        return IR::Opcode::LoadStorage64;
+    case IR::Opcode::LoadGlobal128:
+        return IR::Opcode::LoadStorage128;
+    case IR::Opcode::WriteGlobalS8:
+        return IR::Opcode::WriteStorageS8;
+    case IR::Opcode::WriteGlobalU8:
+        return IR::Opcode::WriteStorageU8;
+    case IR::Opcode::WriteGlobalS16:
+        return IR::Opcode::WriteStorageS16;
+    case IR::Opcode::WriteGlobalU16:
+        return IR::Opcode::WriteStorageU16;
+    case IR::Opcode::WriteGlobal32:
+        return IR::Opcode::WriteStorage32;
+    case IR::Opcode::WriteGlobal64:
+        return IR::Opcode::WriteStorage64;
+    case IR::Opcode::WriteGlobal128:
+        return IR::Opcode::WriteStorage128;
+    default:
+        throw InvalidArgument("Invalid global memory opcode {}", opcode);
+    }
+}
+
+/// Returns true when a storage buffer address satisfies a bias
+bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
+    return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
+           storage_buffer.offset < bias.offset_end;
+}
+
+/// Ignores a global memory operation, reads return zero and writes are ignored
+void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
+    const IR::Value zero{u32{0}};
+    switch (inst->Opcode()) {
+    case IR::Opcode::LoadGlobalS8:
+    case IR::Opcode::LoadGlobalU8:
+    case IR::Opcode::LoadGlobalS16:
+    case IR::Opcode::LoadGlobalU16:
+    case IR::Opcode::LoadGlobal32:
+        inst->ReplaceUsesWith(zero);
+        break;
+    case IR::Opcode::LoadGlobal64:
+        inst->ReplaceUsesWith(
+            IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})});
+        break;
+    case IR::Opcode::LoadGlobal128:
+        inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst(
+            inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})});
+        break;
+    case IR::Opcode::WriteGlobalS8:
+    case IR::Opcode::WriteGlobalU8:
+    case IR::Opcode::WriteGlobalS16:
+    case IR::Opcode::WriteGlobalU16:
+    case IR::Opcode::WriteGlobal32:
+    case IR::Opcode::WriteGlobal64:
+    case IR::Opcode::WriteGlobal128:
+        inst->Invalidate();
+        break;
+    default:
+        throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode());
+    }
+}
+
+/// Recursively tries to track the storage buffer address used by a global memory instruction
+std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
+    if (value.IsImmediate()) {
+        // Immediates can't be a storage buffer
+        return std::nullopt;
+    }
+    const IR::Inst* const inst{value.InstRecursive()};
+    if (inst->Opcode() == IR::Opcode::GetCbuf) {
+        const IR::Value index{inst->Arg(0)};
+        const IR::Value offset{inst->Arg(1)};
+        if (!index.IsImmediate()) {
+            // Definitely not a storage buffer if it's read from a non-immediate index
+            return std::nullopt;
+        }
+        if (!offset.IsImmediate()) {
+            // TODO: Support SSBO arrays
+            return std::nullopt;
+        }
+        const StorageBufferAddr storage_buffer{
+            .index = index.U32(),
+            .offset = offset.U32(),
+        };
+        if (bias && !MeetsBias(storage_buffer, *bias)) {
+            // We have to blacklist some addresses in case we wrongly point to them
+            return std::nullopt;
+        }
+        return storage_buffer;
+    }
+    // Reversed loops are more likely to find the right result
+    for (size_t arg = inst->NumArgs(); arg--;) {
+        if (const std::optional storage_buffer{Track(inst->Arg(arg), bias)}) {
+            return *storage_buffer;
+        }
+    }
+    return std::nullopt;
+}
+
+/// Collects the storage buffer used by a global memory instruction and the instruction itself
+void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst,
+                           StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) {
+    // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
+    // avoid getting false positives
+    static constexpr Bias nvn_bias{
+        .index{0},
+        .offset_begin{0x110},
+        .offset_end{0x610},
+    };
+    // First try to find storage buffers in the NVN address
+    const IR::U64 addr{inst->Arg(0)};
+    std::optional<StorageBufferAddr> storage_buffer{Track(addr, &nvn_bias)};
+    if (!storage_buffer) {
+        // If it fails, track without a bias
+        storage_buffer = Track(addr, nullptr);
+        if (!storage_buffer) {
+            // If that also failed, drop the global memory usage
+            IgnoreGlobalMemory(block, inst);
+        }
+    }
+    // Collect storage buffer and the instruction
+    storage_buffer_set.insert(*storage_buffer);
+    to_replace.push_back(StorageInst{
+        .storage_buffer{*storage_buffer},
+        .inst{inst},
+    });
+}
+
+/// Tries to track the first 32-bits of a global memory instruction
+std::optional<IR::U32> TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) {
+    // The first argument is the low level GPU pointer to the global memory instruction
+    const IR::U64 addr{inst->Arg(0)};
+    if (addr.IsImmediate()) {
+        // Not much we can do if it's an immediate
+        return std::nullopt;
+    }
+    // This address is expected to either be a PackUint2x32 or a IAdd64
+    IR::Inst* addr_inst{addr.InstRecursive()};
+    s32 imm_offset{0};
+    if (addr_inst->Opcode() == IR::Opcode::IAdd64) {
+        // If it's an IAdd64, get the immediate offset it is applying and grab the address
+        // instruction. This expects for the instruction to be canonicalized having the address on
+        // the first argument and the immediate offset on the second one.
+        const IR::U64 imm_offset_value{addr_inst->Arg(1)};
+        if (!imm_offset_value.IsImmediate()) {
+            return std::nullopt;
+        }
+        imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
+        const IR::U64 iadd_addr{addr_inst->Arg(0)};
+        if (iadd_addr.IsImmediate()) {
+            return std::nullopt;
+        }
+        addr_inst = iadd_addr.Inst();
+    }
+    // With IAdd64 handled, now PackUint2x32 is expected without exceptions
+    if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) {
+        return std::nullopt;
+    }
+    // PackUint2x32 is expected to be generated from a vector
+    const IR::Value vector{addr_inst->Arg(0)};
+    if (vector.IsImmediate()) {
+        return std::nullopt;
+    }
+    // This vector is expected to be a CompositeConstruct2
+    IR::Inst* const vector_inst{vector.InstRecursive()};
+    if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) {
+        return std::nullopt;
+    }
+    // Grab the first argument from the CompositeConstruct2, this is the low address.
+    // Re-apply the offset in case we found one.
+    const IR::U32 low_addr{vector_inst->Arg(0)};
+    return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr;
+}
+
+/// Returns the offset in indices (not bytes) for an equivalent storage instruction
+IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) {
+    IR::IREmitter ir{block, inst};
+    IR::U32 offset;
+    if (const std::optional<IR::U32> low_addr{TrackLowAddress(ir, &*inst)}) {
+        offset = *low_addr;
+    } else {
+        offset = ir.ConvertU(32, IR::U64{inst->Arg(0)});
+    }
+    // Subtract the least significant 32 bits from the guest offset. The result is the storage
+    // buffer offset in bytes.
+    const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+    return ir.ISub(offset, low_cbuf);
+}
+
+/// Replace a global memory load instruction with its storage buffer equivalent
+void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
+                 const IR::U32& offset) {
+    const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())};
+    const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})};
+    inst->ReplaceUsesWith(value);
+}
+
+/// Replace a global memory write instruction with its storage buffer equivalent
+void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
+                  const IR::U32& offset) {
+    const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())};
+    block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)});
+    inst->Invalidate();
+}
+
+/// Replace a global memory instruction with its storage buffer equivalent
+void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
+             const IR::U32& offset) {
+    switch (inst->Opcode()) {
+    case IR::Opcode::LoadGlobalS8:
+    case IR::Opcode::LoadGlobalU8:
+    case IR::Opcode::LoadGlobalS16:
+    case IR::Opcode::LoadGlobalU16:
+    case IR::Opcode::LoadGlobal32:
+    case IR::Opcode::LoadGlobal64:
+    case IR::Opcode::LoadGlobal128:
+        return ReplaceLoad(block, inst, storage_index, offset);
+    case IR::Opcode::WriteGlobalS8:
+    case IR::Opcode::WriteGlobalU8:
+    case IR::Opcode::WriteGlobalS16:
+    case IR::Opcode::WriteGlobalU16:
+    case IR::Opcode::WriteGlobal32:
+    case IR::Opcode::WriteGlobal64:
+    case IR::Opcode::WriteGlobal128:
+        return ReplaceWrite(block, inst, storage_index, offset);
+    default:
+        throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode());
+    }
+}
+} // Anonymous namespace
+
+void GlobalMemoryToStorageBufferPass(IR::Block& block) {
+    StorageBufferSet storage_buffers;
+    StorageInstVector to_replace;
+
+    for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) {
+        if (!IsGlobalMemory(*inst)) {
+            continue;
+        }
+        CollectStorageBuffers(block, inst, storage_buffers, to_replace);
+    }
+    for (const auto [storage_buffer, inst] : to_replace) {
+        const auto it{storage_buffers.find(storage_buffer)};
+        const IR::U32 storage_index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}};
+        const IR::U32 offset{StorageOffset(block, inst, storage_buffer)};
+        Replace(block, inst, storage_index, offset);
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
index 7f85000878..39a9729191 100644
--- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -10,22 +10,24 @@
 
 namespace Shader::Optimization {
 
-void IdentityRemovalPass(IR::Block& block) {
+void IdentityRemovalPass(IR::Function& function) {
     std::vector<IR::Inst*> to_invalidate;
 
-    for (auto inst = block.begin(); inst != block.end();) {
-        const size_t num_args{inst->NumArgs()};
-        for (size_t i = 0; i < num_args; ++i) {
-            IR::Value arg;
-            while ((arg = inst->Arg(i)).IsIdentity()) {
-                inst->SetArg(i, arg.Inst()->Arg(0));
+    for (auto& block : function.blocks) {
+        for (auto inst = block->begin(); inst != block->end();) {
+            const size_t num_args{inst->NumArgs()};
+            for (size_t i = 0; i < num_args; ++i) {
+                IR::Value arg;
+                while ((arg = inst->Arg(i)).IsIdentity()) {
+                    inst->SetArg(i, arg.Inst()->Arg(0));
+                }
+            }
+            if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) {
+                to_invalidate.push_back(&*inst);
+                inst = block->Instructions().erase(inst);
+            } else {
+                ++inst;
             }
-        }
-        if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) {
-            to_invalidate.push_back(&*inst);
-            inst = block.Instructions().erase(inst);
-        } else {
-            ++inst;
         }
     }
     for (IR::Inst* const inst : to_invalidate) {
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 7ed4005ed1..578a24d89b 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -16,9 +16,11 @@ void Invoke(Func&& func, IR::Function& function) {
     }
 }
 
+void ConstantPropagationPass(IR::Block& block);
 void DeadCodeEliminationPass(IR::Block& block);
-void IdentityRemovalPass(IR::Block& block);
+void GlobalMemoryToStorageBufferPass(IR::Block& block);
+void IdentityRemovalPass(IR::Function& function);
 void SsaRewritePass(IR::Function& function);
-void VerificationPass(const IR::Block& block);
+void VerificationPass(const IR::Function& function);
 
 } // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index a4b256a40e..3c9b020e08 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -14,8 +14,6 @@
 //      https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
 //
 
-#include <map>
-
 #include <boost/container/flat_map.hpp>
 
 #include "shader_recompiler/frontend/ir/basic_block.h"
@@ -30,6 +28,12 @@ namespace Shader::Optimization {
 namespace {
 using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>;
 
+struct FlagTag {};
+struct ZeroFlagTag : FlagTag {};
+struct SignFlagTag : FlagTag {};
+struct CarryFlagTag : FlagTag {};
+struct OverflowFlagTag : FlagTag {};
+
 struct DefTable {
     [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept {
         return regs[IR::RegIndex(variable)];
@@ -39,8 +43,28 @@ struct DefTable {
         return preds[IR::PredIndex(variable)];
     }
 
+    [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept {
+        return zero_flag;
+    }
+
+    [[nodiscard]] ValueMap& operator[](SignFlagTag) noexcept {
+        return sign_flag;
+    }
+
+    [[nodiscard]] ValueMap& operator[](CarryFlagTag) noexcept {
+        return carry_flag;
+    }
+
+    [[nodiscard]] ValueMap& operator[](OverflowFlagTag) noexcept {
+        return overflow_flag;
+    }
+
     std::array<ValueMap, IR::NUM_USER_REGS> regs;
     std::array<ValueMap, IR::NUM_USER_PREDS> preds;
+    ValueMap zero_flag;
+    ValueMap sign_flag;
+    ValueMap carry_flag;
+    ValueMap overflow_flag;
 };
 
 IR::Opcode UndefOpcode(IR::Reg) noexcept {
@@ -51,6 +75,10 @@ IR::Opcode UndefOpcode(IR::Pred) noexcept {
     return IR::Opcode::Undef1;
 }
 
+IR::Opcode UndefOpcode(const FlagTag&) noexcept {
+    return IR::Opcode::Undef1;
+}
+
 [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {
     return inst.Opcode() == IR::Opcode::Phi;
 }
@@ -135,6 +163,18 @@ void SsaRewritePass(IR::Function& function) {
                     pass.WriteVariable(pred, block.get(), inst.Arg(1));
                 }
                 break;
+            case IR::Opcode::SetZFlag:
+                pass.WriteVariable(ZeroFlagTag{}, block.get(), inst.Arg(0));
+                break;
+            case IR::Opcode::SetSFlag:
+                pass.WriteVariable(SignFlagTag{}, block.get(), inst.Arg(0));
+                break;
+            case IR::Opcode::SetCFlag:
+                pass.WriteVariable(CarryFlagTag{}, block.get(), inst.Arg(0));
+                break;
+            case IR::Opcode::SetOFlag:
+                pass.WriteVariable(OverflowFlagTag{}, block.get(), inst.Arg(0));
+                break;
             case IR::Opcode::GetRegister:
                 if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
                     inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get()));
@@ -145,6 +185,18 @@ void SsaRewritePass(IR::Function& function) {
                     inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get()));
                 }
                 break;
+            case IR::Opcode::GetZFlag:
+                inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block.get()));
+                break;
+            case IR::Opcode::GetSFlag:
+                inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block.get()));
+                break;
+            case IR::Opcode::GetCFlag:
+                inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block.get()));
+                break;
+            case IR::Opcode::GetOFlag:
+                inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block.get()));
+                break;
             default:
                 break;
             }
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
index 36d9ae39b2..8a5adf5a23 100644
--- a/src/shader_recompiler/ir_opt/verification_pass.cpp
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -11,40 +11,44 @@
 
 namespace Shader::Optimization {
 
-static void ValidateTypes(const IR::Block& block) {
-    for (const IR::Inst& inst : block) {
-        const size_t num_args{inst.NumArgs()};
-        for (size_t i = 0; i < num_args; ++i) {
-            const IR::Type t1{inst.Arg(i).Type()};
-            const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)};
-            if (!IR::AreTypesCompatible(t1, t2)) {
-                throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(block));
+static void ValidateTypes(const IR::Function& function) {
+    for (const auto& block : function.blocks) {
+        for (const IR::Inst& inst : *block) {
+            const size_t num_args{inst.NumArgs()};
+            for (size_t i = 0; i < num_args; ++i) {
+                const IR::Type t1{inst.Arg(i).Type()};
+                const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)};
+                if (!IR::AreTypesCompatible(t1, t2)) {
+                    throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
+                }
             }
         }
     }
 }
 
-static void ValidateUses(const IR::Block& block) {
+static void ValidateUses(const IR::Function& function) {
     std::map<IR::Inst*, int> actual_uses;
-    for (const IR::Inst& inst : block) {
-        const size_t num_args{inst.NumArgs()};
-        for (size_t i = 0; i < num_args; ++i) {
-            const IR::Value arg{inst.Arg(i)};
-            if (!arg.IsImmediate()) {
-                ++actual_uses[arg.Inst()];
+    for (const auto& block : function.blocks) {
+        for (const IR::Inst& inst : *block) {
+            const size_t num_args{inst.NumArgs()};
+            for (size_t i = 0; i < num_args; ++i) {
+                const IR::Value arg{inst.Arg(i)};
+                if (!arg.IsImmediate()) {
+                    ++actual_uses[arg.Inst()];
+                }
             }
         }
     }
     for (const auto [inst, uses] : actual_uses) {
         if (inst->UseCount() != uses) {
-            throw LogicError("Invalid uses in block:\n{}", IR::DumpBlock(block));
+            throw LogicError("Invalid uses in block:" /*, IR::DumpFunction(function)*/);
         }
     }
 }
 
-void VerificationPass(const IR::Block& block) {
-    ValidateTypes(block);
-    ValidateUses(block);
+void VerificationPass(const IR::Function& function) {
+    ValidateTypes(function);
+    ValidateUses(function);
 }
 
 } // namespace Shader::Optimization
-- 
cgit v1.2.3-70-g09d2


From da8096e6e35af250dcc56a1af76b8a211df63a90 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 6 Feb 2021 02:38:22 -0300
Subject: shader: Properly store phi on Inst

---
 src/shader_recompiler/frontend/ir/basic_block.cpp  |  33 +++----
 .../frontend/ir/microinstruction.cpp               | 102 +++++++++++++++------
 .../frontend/ir/microinstruction.h                 |  37 +++++---
 src/shader_recompiler/frontend/ir/opcodes.inc      |   2 +-
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  |  22 +++--
 src/shader_recompiler/object_pool.h                |  11 +--
 6 files changed, 132 insertions(+), 75 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index 1a5d821357..50c6a83cd0 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -129,26 +129,21 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
         } else {
             ret += fmt::format("         {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
         }
-        if (op == Opcode::Phi) {
-            size_t val_index{0};
-            for (const auto& [phi_block, phi_val] : inst.PhiOperands()) {
-                ret += val_index != 0 ? ", " : " ";
-                ret += fmt::format("[ {}, {} ]", ArgToIndex(block_to_index, inst_to_index, phi_val),
-                                   BlockToIndex(block_to_index, phi_block));
-                ++val_index;
+        const size_t arg_count{NumArgsOf(op)};
+        for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
+            const Value arg{inst.Arg(arg_index)};
+            const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, arg)};
+            ret += arg_index != 0 ? ", " : " ";
+            if (op == Opcode::Phi) {
+                ret += fmt::format("[ {}, {} ]", arg_index,
+                                   BlockToIndex(block_to_index, inst.PhiBlock(arg_index)));
+            } else {
+                ret += arg_str;
             }
-        } else {
-            const size_t arg_count{NumArgsOf(op)};
-            for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
-                const Value arg{inst.Arg(arg_index)};
-                ret += arg_index != 0 ? ", " : " ";
-                ret += ArgToIndex(block_to_index, inst_to_index, arg);
-
-                const Type actual_type{arg.Type()};
-                const Type expected_type{ArgTypeOf(op, arg_index)};
-                if (!AreTypesCompatible(actual_type, expected_type)) {
-                    ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
-                }
+            const Type actual_type{arg.Type()};
+            const Type expected_type{ArgTypeOf(op, arg_index)};
+            if (!AreTypesCompatible(actual_type, expected_type)) {
+                ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
             }
         }
         if (TypeOf(op) != Type::Void) {
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index de953838cc..e7ca92039e 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <memory>
 
 #include "shader_recompiler/exception.h"
 #include "shader_recompiler/frontend/ir/microinstruction.h"
@@ -30,6 +31,22 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode)
     inst = nullptr;
 }
 
+Inst::Inst(IR::Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {
+    if (op == Opcode::Phi) {
+        std::construct_at(&phi_args);
+    } else {
+        std::construct_at(&args);
+    }
+}
+
+Inst::~Inst() {
+    if (op == Opcode::Phi) {
+        std::destroy_at(&phi_args);
+    } else {
+        std::destroy_at(&args);
+    }
+}
+
 bool Inst::MayHaveSideEffects() const noexcept {
     switch (op) {
     case Opcode::Branch:
@@ -71,7 +88,10 @@ bool Inst::IsPseudoInstruction() const noexcept {
     }
 }
 
-bool Inst::AreAllArgsImmediates() const noexcept {
+bool Inst::AreAllArgsImmediates() const {
+    if (op == Opcode::Phi) {
+        throw LogicError("Testing for all arguments are immediates on phi instruction");
+    }
     return std::all_of(args.begin(), args.begin() + NumArgs(),
                        [](const IR::Value& value) { return value.IsImmediate(); });
 }
@@ -101,7 +121,7 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
 }
 
 size_t Inst::NumArgs() const {
-    return NumArgsOf(op);
+    return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op);
 }
 
 IR::Type Inst::Type() const {
@@ -109,13 +129,23 @@ IR::Type Inst::Type() const {
 }
 
 Value Inst::Arg(size_t index) const {
-    if (index >= NumArgsOf(op)) {
-        throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
+    if (op == Opcode::Phi) {
+        if (index >= phi_args.size()) {
+            throw InvalidArgument("Out of bounds argument index {} in phi instruction", index);
+        }
+        return phi_args[index].second;
+    } else {
+        if (index >= NumArgsOf(op)) {
+            throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
+        }
+        return args[index];
     }
-    return args[index];
 }
 
 void Inst::SetArg(size_t index, Value value) {
+    if (op == Opcode::Phi) {
+        throw LogicError("Setting argument on a phi instruction");
+    }
     if (index >= NumArgsOf(op)) {
         throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
     }
@@ -128,15 +158,21 @@ void Inst::SetArg(size_t index, Value value) {
     args[index] = value;
 }
 
-std::span<const std::pair<Block*, Value>> Inst::PhiOperands() const noexcept {
-    return phi_operands;
+Block* Inst::PhiBlock(size_t index) const {
+    if (op != Opcode::Phi) {
+        throw LogicError("{} is not a Phi instruction", op);
+    }
+    if (index >= phi_args.size()) {
+        throw InvalidArgument("Out of bounds argument index {} in phi instruction");
+    }
+    return phi_args[index].first;
 }
 
 void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
     if (!value.IsImmediate()) {
         Use(value);
     }
-    phi_operands.emplace_back(predecessor, value);
+    phi_args.emplace_back(predecessor, value);
 }
 
 void Inst::Invalidate() {
@@ -145,18 +181,22 @@ void Inst::Invalidate() {
 }
 
 void Inst::ClearArgs() {
-    for (auto& value : args) {
-        if (!value.IsImmediate()) {
-            UndoUse(value);
+    if (op == Opcode::Phi) {
+        for (auto& pair : phi_args) {
+            IR::Value& value{pair.second};
+            if (!value.IsImmediate()) {
+                UndoUse(value);
+            }
         }
-        value = {};
-    }
-    for (auto& [phi_block, phi_op] : phi_operands) {
-        if (!phi_op.IsImmediate()) {
-            UndoUse(phi_op);
+        phi_args.clear();
+    } else {
+        for (auto& value : args) {
+            if (!value.IsImmediate()) {
+                UndoUse(value);
+            }
+            value = {};
         }
     }
-    phi_operands.clear();
 }
 
 void Inst::ReplaceUsesWith(Value replacement) {
@@ -167,24 +207,29 @@ void Inst::ReplaceUsesWith(Value replacement) {
     if (!replacement.IsImmediate()) {
         Use(replacement);
     }
-    args[0] = replacement;
+    if (op == Opcode::Phi) {
+        phi_args[0].second = replacement;
+    } else {
+        args[0] = replacement;
+    }
 }
 
 void Inst::Use(const Value& value) {
-    ++value.Inst()->use_count;
+    Inst* const inst{value.Inst()};
+    ++inst->use_count;
 
     switch (op) {
     case Opcode::GetZeroFromOp:
-        SetPseudoInstruction(value.Inst()->zero_inst, this);
+        SetPseudoInstruction(inst->zero_inst, this);
         break;
     case Opcode::GetSignFromOp:
-        SetPseudoInstruction(value.Inst()->sign_inst, this);
+        SetPseudoInstruction(inst->sign_inst, this);
         break;
     case Opcode::GetCarryFromOp:
-        SetPseudoInstruction(value.Inst()->carry_inst, this);
+        SetPseudoInstruction(inst->carry_inst, this);
         break;
     case Opcode::GetOverflowFromOp:
-        SetPseudoInstruction(value.Inst()->overflow_inst, this);
+        SetPseudoInstruction(inst->overflow_inst, this);
         break;
     default:
         break;
@@ -192,20 +237,21 @@ void Inst::Use(const Value& value) {
 }
 
 void Inst::UndoUse(const Value& value) {
-    --value.Inst()->use_count;
+    Inst* const inst{value.Inst()};
+    --inst->use_count;
 
     switch (op) {
     case Opcode::GetZeroFromOp:
-        RemovePseudoInstruction(value.Inst()->zero_inst, Opcode::GetZeroFromOp);
+        RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp);
         break;
     case Opcode::GetSignFromOp:
-        RemovePseudoInstruction(value.Inst()->sign_inst, Opcode::GetSignFromOp);
+        RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp);
         break;
     case Opcode::GetCarryFromOp:
-        RemovePseudoInstruction(value.Inst()->carry_inst, Opcode::GetCarryFromOp);
+        RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp);
         break;
     case Opcode::GetOverflowFromOp:
-        RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp);
+        RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp);
         break;
     default:
         break;
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 80baffb2e8..ddf0f90a9e 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -6,8 +6,8 @@
 
 #include <array>
 #include <cstring>
-#include <span>
 #include <type_traits>
+#include <utility>
 #include <vector>
 
 #include <boost/intrusive/list.hpp>
@@ -25,7 +25,14 @@ constexpr size_t MAX_ARG_COUNT = 4;
 
 class Inst : public boost::intrusive::list_base_hook<> {
 public:
-    explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {}
+    explicit Inst(Opcode op_, u64 flags_) noexcept;
+    ~Inst();
+
+    Inst& operator=(const Inst&) = delete;
+    Inst(const Inst&) = delete;
+
+    Inst& operator=(Inst&&) = delete;
+    Inst(Inst&&) = delete;
 
     /// Get the number of uses this instruction has.
     [[nodiscard]] int UseCount() const noexcept {
@@ -50,26 +57,26 @@ public:
     [[nodiscard]] bool IsPseudoInstruction() const noexcept;
 
     /// Determines if all arguments of this instruction are immediates.
-    [[nodiscard]] bool AreAllArgsImmediates() const noexcept;
+    [[nodiscard]] bool AreAllArgsImmediates() const;
 
     /// Determines if there is a pseudo-operation associated with this instruction.
     [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept;
     /// Gets a pseudo-operation associated with this instruction
     [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
 
-    /// Get the number of arguments this instruction has.
-    [[nodiscard]] size_t NumArgs() const;
-
     /// Get the type this instruction returns.
     [[nodiscard]] IR::Type Type() const;
 
+    /// Get the number of arguments this instruction has.
+    [[nodiscard]] size_t NumArgs() const;
+
     /// Get the value of a given argument index.
     [[nodiscard]] Value Arg(size_t index) const;
     /// Set the value of a given argument index.
     void SetArg(size_t index, Value value);
 
-    /// Get an immutable span to the phi operands.
-    [[nodiscard]] std::span<const std::pair<Block*, Value>> PhiOperands() const noexcept;
+    /// Get a pointer to the block of a phi argument.
+    [[nodiscard]] Block* PhiBlock(size_t index) const;
     /// Add phi operand to a phi instruction.
     void AddPhiOperand(Block* predecessor, const Value& value);
 
@@ -87,18 +94,26 @@ public:
     }
 
 private:
+    struct NonTriviallyDummy {
+        NonTriviallyDummy() noexcept {}
+    };
+
     void Use(const Value& value);
     void UndoUse(const Value& value);
 
     IR::Opcode op{};
     int use_count{};
-    std::array<Value, MAX_ARG_COUNT> args{};
+    u64 flags{};
+    union {
+        NonTriviallyDummy dummy{};
+        std::array<Value, MAX_ARG_COUNT> args;
+        std::vector<std::pair<Block*, Value>> phi_args;
+    };
     Inst* zero_inst{};
     Inst* sign_inst{};
     Inst* carry_inst{};
     Inst* overflow_inst{};
-    std::vector<std::pair<Block*, Value>> phi_operands;
-    u64 flags{};
 };
+static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size");
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 6eb105d929..82b04f37c6 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -3,9 +3,9 @@
 // Refer to the license.txt file included.
 
 //     opcode name,                                         return type,    arg1 type,      arg2 type,      arg3 type,      arg4 type,      ...
+OPCODE(Phi,                                                 Opaque,                                                                         )
 OPCODE(Void,                                                Void,                                                                           )
 OPCODE(Identity,                                            Opaque,         Opaque,                                                         )
-OPCODE(Phi,                                                 Opaque,         /*todo*/                                                        )
 
 // Control flow
 OPCODE(Branch,                                              Void,           Label,                                                          )
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index 7713e3ba9f..15a9db90a9 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -104,32 +104,34 @@ private:
             val = ReadVariable(variable, preds.front());
         } else {
             // Break potential cycles with operandless phi
-            val = IR::Value{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
+            IR::Inst& phi_inst{*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
+            val = IR::Value{&phi_inst};
             WriteVariable(variable, block, val);
-            val = AddPhiOperands(variable, val, block);
+            val = AddPhiOperands(variable, phi_inst, block);
         }
         WriteVariable(variable, block, val);
         return val;
     }
 
-    IR::Value AddPhiOperands(auto variable, const IR::Value& phi, IR::Block* block) {
+    IR::Value AddPhiOperands(auto variable, IR::Inst& phi, IR::Block* block) {
         for (IR::Block* const pred : block->ImmediatePredecessors()) {
-            phi.Inst()->AddPhiOperand(pred, ReadVariable(variable, pred));
+            phi.AddPhiOperand(pred, ReadVariable(variable, pred));
         }
         return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
     }
 
-    IR::Value TryRemoveTrivialPhi(const IR::Value& phi, IR::Block* block, IR::Opcode undef_opcode) {
+    IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
         IR::Value same;
-        for (const auto& pair : phi.Inst()->PhiOperands()) {
-            const IR::Value& op{pair.second};
-            if (op == same || op == phi) {
+        const size_t num_args{phi.NumArgs()};
+        for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
+            const IR::Value& op{phi.Arg(arg_index)};
+            if (op == same || op == IR::Value{&phi}) {
                 // Unique value or self-reference
                 continue;
             }
             if (!same.IsEmpty()) {
                 // The phi merges at least two values: not trivial
-                return phi;
+                return IR::Value{&phi};
             }
             same = op;
         }
@@ -139,7 +141,7 @@ private:
             same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)};
         }
         // Reroute all uses of phi to same and remove phi
-        phi.Inst()->ReplaceUsesWith(same);
+        phi.ReplaceUsesWith(same);
         // TODO: Try to recursively remove all phi users, which might have become trivial
         return same;
     }
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h
index 7c65bbd921..a573add327 100644
--- a/src/shader_recompiler/object_pool.h
+++ b/src/shader_recompiler/object_pool.h
@@ -6,6 +6,7 @@
 
 #include <memory>
 #include <type_traits>
+#include <utility>
 
 namespace Shader {
 
@@ -31,14 +32,12 @@ public:
 
     void ReleaseContents() {
         Chunk* chunk{&root};
-        if (chunk) {
-            const size_t free_objects{chunk->free_objects};
-            if (free_objects == chunk_size) {
+        while (chunk) {
+            if (chunk->free_objects == chunk_size) {
                 break;
             }
-            chunk->free_objects = chunk_size;
-            for (size_t obj_id = free_objects; obj_id < chunk_size; ++obj_id) {
-                chunk->storage[obj_id].object.~T();
+            for (; chunk->free_objects < chunk_size; ++chunk->free_objects) {
+                chunk->storage[chunk->free_objects].object.~T();
             }
             chunk = chunk->next.get();
         }
-- 
cgit v1.2.3-70-g09d2


From 9170200a11715d131645d1ffb92e86e6ef0d7e88 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 11 Feb 2021 16:39:06 -0300
Subject: shader: Initial implementation of an AST

---
 externals/sirit                                    |   2 +-
 src/shader_recompiler/CMakeLists.txt               |   4 +-
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |  45 +-
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  18 +-
 .../backend/spirv/emit_spirv_context_get_set.cpp   |   8 +
 .../backend/spirv/emit_spirv_control_flow.cpp      |  25 -
 .../backend/spirv/emit_spirv_undefined.cpp         |  12 +-
 src/shader_recompiler/frontend/ir/basic_block.cpp  |  64 +-
 src/shader_recompiler/frontend/ir/basic_block.h    |  40 +-
 src/shader_recompiler/frontend/ir/condition.cpp    |  14 +-
 src/shader_recompiler/frontend/ir/condition.h      |   2 +-
 src/shader_recompiler/frontend/ir/function.h       |   2 +-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  43 +-
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  23 +-
 .../frontend/ir/microinstruction.cpp               |   4 +-
 src/shader_recompiler/frontend/ir/opcodes.inc      |  16 +-
 .../frontend/ir/structured_control_flow.cpp        | 742 +++++++++++++++++++++
 .../frontend/ir/structured_control_flow.h          |  22 +
 .../frontend/maxwell/control_flow.cpp              | 426 +++++-------
 .../frontend/maxwell/control_flow.h                |  77 +--
 src/shader_recompiler/frontend/maxwell/location.h  |  12 +-
 src/shader_recompiler/frontend/maxwell/program.cpp |  69 +-
 src/shader_recompiler/frontend/maxwell/program.h   |   2 +-
 .../frontend/maxwell/termination_code.cpp          |  86 ---
 .../frontend/maxwell/termination_code.h            |  17 -
 .../maxwell/translate/impl/integer_shift_left.cpp  |   2 +-
 .../frontend/maxwell/translate/translate.cpp       |  17 +-
 .../frontend/maxwell/translate/translate.h         |   7 +-
 .../ir_opt/constant_propagation_pass.cpp           |  50 ++
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  |  24 +-
 src/shader_recompiler/ir_opt/verification_pass.cpp |   4 +
 src/shader_recompiler/main.cpp                     |  29 +-
 src/shader_recompiler/shader_info.h                |  28 +
 33 files changed, 1346 insertions(+), 590 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.cpp
 create mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.h
 delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.cpp
 delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.h
 create mode 100644 src/shader_recompiler/shader_info.h

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/externals/sirit b/externals/sirit
index 1f7b70730d..c374bfd9fd 160000
--- a/externals/sirit
+++ b/externals/sirit
@@ -1 +1 @@
-Subproject commit 1f7b70730d610cfbd5099ab93dd38ec8a78e7e35
+Subproject commit c374bfd9fdff02a0cff85d005488967b1b0f675e
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 12fbcb37c3..27fc79e21b 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -36,6 +36,8 @@ add_executable(shader_recompiler
     frontend/ir/program.cpp
     frontend/ir/program.h
     frontend/ir/reg.h
+    frontend/ir/structured_control_flow.cpp
+    frontend/ir/structured_control_flow.h
     frontend/ir/type.cpp
     frontend/ir/type.h
     frontend/ir/value.cpp
@@ -51,8 +53,6 @@ add_executable(shader_recompiler
     frontend/maxwell/opcodes.h
     frontend/maxwell/program.cpp
     frontend/maxwell/program.h
-    frontend/maxwell/termination_code.cpp
-    frontend/maxwell/termination_code.h
     frontend/maxwell/translate/impl/common_encoding.h
     frontend/maxwell/translate/impl/floating_point_add.cpp
     frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 7c4269fad4..5022b51597 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -105,8 +105,26 @@ void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) {
     throw LogicError("Invalid opcode {}", inst->Opcode());
 }
 
-void EmitSPIRV::EmitPhi(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+static Id TypeId(const EmitContext& ctx, IR::Type type) {
+    switch (type) {
+    case IR::Type::U1:
+        return ctx.u1;
+    default:
+        throw NotImplementedException("Phi node type {}", type);
+    }
+}
+
+Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) {
+    const size_t num_args{inst->NumArgs()};
+    boost::container::small_vector<Id, 64> operands;
+    operands.reserve(num_args * 2);
+    for (size_t index = 0; index < num_args; ++index) {
+        IR::Block* const phi_block{inst->PhiBlock(index)};
+        operands.push_back(ctx.Def(inst->Arg(index)));
+        operands.push_back(ctx.BlockLabel(phi_block));
+    }
+    const Id result_type{TypeId(ctx, inst->Arg(0).Type())};
+    return ctx.OpPhi(result_type, std::span(operands.data(), operands.size()));
 }
 
 void EmitSPIRV::EmitVoid(EmitContext&) {}
@@ -115,6 +133,29 @@ void EmitSPIRV::EmitIdentity(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
+// FIXME: Move to its own file
+void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) {
+    ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label()));
+}
+
+void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) {
+    ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()),
+                            ctx.BlockLabel(inst->Arg(2).Label()));
+}
+
+void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Inst* inst) {
+    ctx.OpLoopMerge(ctx.BlockLabel(inst->Arg(0).Label()), ctx.BlockLabel(inst->Arg(1).Label()),
+                    spv::LoopControlMask::MaskNone);
+}
+
+void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst) {
+    ctx.OpSelectionMerge(ctx.BlockLabel(inst->Arg(0).Label()), spv::SelectionControlMask::MaskNone);
+}
+
+void EmitSPIRV::EmitReturn(EmitContext& ctx) {
+    ctx.OpReturn();
+}
+
 void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) {
     throw LogicError("Unreachable instruction");
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 3f4b68a7d1..9aa83b5de4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -124,18 +124,20 @@ private:
     void EmitInst(EmitContext& ctx, IR::Inst* inst);
 
     // Microinstruction emitters
-    void EmitPhi(EmitContext& ctx);
+    Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
     void EmitVoid(EmitContext& ctx);
     void EmitIdentity(EmitContext& ctx);
     void EmitBranch(EmitContext& ctx, IR::Inst* inst);
     void EmitBranchConditional(EmitContext& ctx, IR::Inst* inst);
-    void EmitExit(EmitContext& ctx);
+    void EmitLoopMerge(EmitContext& ctx, IR::Inst* inst);
+    void EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst);
     void EmitReturn(EmitContext& ctx);
-    void EmitUnreachable(EmitContext& ctx);
     void EmitGetRegister(EmitContext& ctx);
     void EmitSetRegister(EmitContext& ctx);
     void EmitGetPred(EmitContext& ctx);
     void EmitSetPred(EmitContext& ctx);
+    void EmitSetGotoVariable(EmitContext& ctx);
+    void EmitGetGotoVariable(EmitContext& ctx);
     Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
     void EmitGetAttribute(EmitContext& ctx);
     void EmitSetAttribute(EmitContext& ctx);
@@ -151,11 +153,11 @@ private:
     void EmitSetOFlag(EmitContext& ctx);
     Id EmitWorkgroupId(EmitContext& ctx);
     Id EmitLocalInvocationId(EmitContext& ctx);
-    void EmitUndef1(EmitContext& ctx);
-    void EmitUndef8(EmitContext& ctx);
-    void EmitUndef16(EmitContext& ctx);
-    void EmitUndef32(EmitContext& ctx);
-    void EmitUndef64(EmitContext& ctx);
+    Id EmitUndefU1(EmitContext& ctx);
+    void EmitUndefU8(EmitContext& ctx);
+    void EmitUndefU16(EmitContext& ctx);
+    void EmitUndefU32(EmitContext& ctx);
+    void EmitUndefU64(EmitContext& ctx);
     void EmitLoadGlobalU8(EmitContext& ctx);
     void EmitLoadGlobalS8(EmitContext& ctx);
     void EmitLoadGlobalU16(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index b121305ea9..1eab739edb 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -22,6 +22,14 @@ void EmitSPIRV::EmitSetPred(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
+void EmitSPIRV::EmitSetGotoVariable(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSPIRV::EmitGetGotoVariable(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
 Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
     if (!binding.IsImmediate()) {
         throw NotImplementedException("Constant buffer indexing");
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
index 770fe113c8..66ce6c8c54 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -3,28 +3,3 @@
 // Refer to the license.txt file included.
 
 #include "shader_recompiler/backend/spirv/emit_spirv.h"
-
-namespace Shader::Backend::SPIRV {
-
-void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) {
-    ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label()));
-}
-
-void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) {
-    ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()),
-                            ctx.BlockLabel(inst->Arg(2).Label()));
-}
-
-void EmitSPIRV::EmitExit(EmitContext& ctx) {
-    ctx.OpReturn();
-}
-
-void EmitSPIRV::EmitReturn(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitSPIRV::EmitUnreachable(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
index 3850b072ce..859b60a95d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
@@ -6,23 +6,23 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitSPIRV::EmitUndef1(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) {
+    return ctx.OpUndef(ctx.u1);
 }
 
-void EmitSPIRV::EmitUndef8(EmitContext&) {
+void EmitSPIRV::EmitUndefU8(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitUndef16(EmitContext&) {
+void EmitSPIRV::EmitUndefU16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitUndef32(EmitContext&) {
+void EmitSPIRV::EmitUndefU32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitUndef64(EmitContext&) {
+void EmitSPIRV::EmitUndefU64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index da33ff6f10..b5616f3941 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -17,6 +17,8 @@ namespace Shader::IR {
 Block::Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end)
     : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {}
 
+Block::Block(ObjectPool<Inst>& inst_pool_) : Block{inst_pool_, 0, 0} {}
+
 Block::~Block() = default;
 
 void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
@@ -38,8 +40,25 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
     return result_it;
 }
 
-void Block::AddImmediatePredecessor(IR::Block* immediate_predecessor) {
-    imm_predecessors.push_back(immediate_predecessor);
+void Block::SetBranches(Condition cond, Block* branch_true_, Block* branch_false_) {
+    branch_cond = cond;
+    branch_true = branch_true_;
+    branch_false = branch_false_;
+}
+
+void Block::SetBranch(Block* branch) {
+    branch_cond = Condition{true};
+    branch_true = branch;
+}
+
+void Block::SetReturn() {
+    branch_cond = Condition{true};
+    branch_true = nullptr;
+    branch_false = nullptr;
+}
+
+bool Block::IsVirtual() const noexcept {
+    return location_begin == location_end;
 }
 
 u32 Block::LocationBegin() const noexcept {
@@ -58,6 +77,12 @@ const Block::InstructionList& Block::Instructions() const noexcept {
     return instructions;
 }
 
+void Block::AddImmediatePredecessor(Block* block) {
+    if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) {
+        imm_predecessors.push_back(block);
+    }
+}
+
 std::span<IR::Block* const> Block::ImmediatePredecessors() const noexcept {
     return imm_predecessors;
 }
@@ -70,8 +95,17 @@ static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_i
     return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block));
 }
 
+static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
+                        const Inst* inst) {
+    const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)};
+    if (is_inserted) {
+        ++inst_index;
+    }
+    return it->second;
+}
+
 static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_index,
-                              const std::map<const Inst*, size_t>& inst_to_index,
+                              std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
                               const Value& arg) {
     if (arg.IsEmpty()) {
         return "<null>";
@@ -80,10 +114,7 @@ static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_ind
         return BlockToIndex(block_to_index, arg.Label());
     }
     if (!arg.IsImmediate()) {
-        if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) {
-            return fmt::format("%{}", it->second);
-        }
-        return fmt::format("%<unknown inst {:016x}>", reinterpret_cast<u64>(arg.Inst()));
+        return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst()));
     }
     switch (arg.Type()) {
     case Type::U1:
@@ -125,14 +156,14 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
         const Opcode op{inst.Opcode()};
         ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
         if (TypeOf(op) != Type::Void) {
-            ret += fmt::format("%{:<5} = {}", inst_index, op);
+            ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
         } else {
             ret += fmt::format("         {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
         }
-        const size_t arg_count{NumArgsOf(op)};
+        const size_t arg_count{inst.NumArgs()};
         for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
             const Value arg{inst.Arg(arg_index)};
-            const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, arg)};
+            const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)};
             ret += arg_index != 0 ? ", " : " ";
             if (op == Opcode::Phi) {
                 ret += fmt::format("[ {}, {} ]", arg_index,
@@ -140,10 +171,12 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
             } else {
                 ret += arg_str;
             }
-            const Type actual_type{arg.Type()};
-            const Type expected_type{ArgTypeOf(op, arg_index)};
-            if (!AreTypesCompatible(actual_type, expected_type)) {
-                ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
+            if (op != Opcode::Phi) {
+                const Type actual_type{arg.Type()};
+                const Type expected_type{ArgTypeOf(op, arg_index)};
+                if (!AreTypesCompatible(actual_type, expected_type)) {
+                    ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
+                }
             }
         }
         if (TypeOf(op) != Type::Void) {
@@ -151,9 +184,6 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
         } else {
             ret += '\n';
         }
-
-        inst_to_index.emplace(&inst, inst_index);
-        ++inst_index;
     }
     return ret;
 }
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
index ec3ad62634..3205705e79 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.h
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -11,7 +11,9 @@
 
 #include <boost/intrusive/list.hpp>
 
+#include "shader_recompiler/frontend/ir/condition.h"
 #include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/object_pool.h"
 
 namespace Shader::IR {
@@ -26,6 +28,7 @@ public:
     using const_reverse_iterator = InstructionList::const_reverse_iterator;
 
     explicit Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end);
+    explicit Block(ObjectPool<Inst>& inst_pool_);
     ~Block();
 
     Block(const Block&) = delete;
@@ -41,9 +44,15 @@ public:
     iterator PrependNewInst(iterator insertion_point, Opcode op,
                             std::initializer_list<Value> args = {}, u64 flags = 0);
 
-    /// Adds a new immediate predecessor to the basic block.
-    void AddImmediatePredecessor(IR::Block* immediate_predecessor);
+    /// Set the branches to jump to when all instructions have executed.
+    void SetBranches(Condition cond, Block* branch_true, Block* branch_false);
+    /// Set the branch to unconditionally jump to when all instructions have executed.
+    void SetBranch(Block* branch);
+    /// Mark the block as a return block.
+    void SetReturn();
 
+    /// Returns true when the block does not implement any guest instructions directly.
+    [[nodiscard]] bool IsVirtual() const noexcept;
     /// Gets the starting location of this basic block.
     [[nodiscard]] u32 LocationBegin() const noexcept;
     /// Gets the end location for this basic block.
@@ -54,8 +63,23 @@ public:
     /// Gets an immutable reference to the instruction list for this basic block.
     [[nodiscard]] const InstructionList& Instructions() const noexcept;
 
+    /// Adds a new immediate predecessor to this basic block.
+    void AddImmediatePredecessor(Block* block);
     /// Gets an immutable span to the immediate predecessors.
-    [[nodiscard]] std::span<IR::Block* const> ImmediatePredecessors() const noexcept;
+    [[nodiscard]] std::span<Block* const> ImmediatePredecessors() const noexcept;
+
+    [[nodiscard]] Condition BranchCondition() const noexcept {
+        return branch_cond;
+    }
+    [[nodiscard]] bool IsTerminationBlock() const noexcept {
+        return !branch_true && !branch_false;
+    }
+    [[nodiscard]] Block* TrueBranch() const noexcept {
+        return branch_true;
+    }
+    [[nodiscard]] Block* FalseBranch() const noexcept {
+        return branch_false;
+    }
 
     [[nodiscard]] bool empty() const {
         return instructions.empty();
@@ -129,10 +153,18 @@ private:
     /// List of instructions in this block
     InstructionList instructions;
 
+    /// Condition to choose the branch to take
+    Condition branch_cond{true};
+    /// Block to jump into when the branch condition evaluates as true
+    Block* branch_true{nullptr};
+    /// Block to jump into when the branch condition evaluates as false
+    Block* branch_false{nullptr};
     /// Block immediate predecessors
-    std::vector<IR::Block*> imm_predecessors;
+    std::vector<Block*> imm_predecessors;
 };
 
+using BlockList = std::vector<Block*>;
+
 [[nodiscard]] std::string DumpBlock(const Block& block);
 
 [[nodiscard]] std::string DumpBlock(const Block& block,
diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp
index edff35dc77..ec1659e2bc 100644
--- a/src/shader_recompiler/frontend/ir/condition.cpp
+++ b/src/shader_recompiler/frontend/ir/condition.cpp
@@ -16,15 +16,13 @@ std::string NameOf(Condition condition) {
         ret = fmt::to_string(condition.FlowTest());
     }
     const auto [pred, negated]{condition.Pred()};
-    if (pred != Pred::PT || negated) {
-        if (!ret.empty()) {
-            ret += '&';
-        }
-        if (negated) {
-            ret += '!';
-        }
-        ret += fmt::to_string(pred);
+    if (!ret.empty()) {
+        ret += '&';
     }
+    if (negated) {
+        ret += '!';
+    }
+    ret += fmt::to_string(pred);
     return ret;
 }
 
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h
index 52737025c9..16b4ae888e 100644
--- a/src/shader_recompiler/frontend/ir/condition.h
+++ b/src/shader_recompiler/frontend/ir/condition.h
@@ -26,7 +26,7 @@ public:
     explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept
         : Condition(FlowTest::T, pred_, pred_negated_) {}
 
-    Condition(bool value) : Condition(Pred::PT, !value) {}
+    explicit Condition(bool value) : Condition(Pred::PT, !value) {}
 
     auto operator<=>(const Condition&) const noexcept = default;
 
diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h
index bba7d1d395..fd7d564191 100644
--- a/src/shader_recompiler/frontend/ir/function.h
+++ b/src/shader_recompiler/frontend/ir/function.h
@@ -11,7 +11,7 @@
 namespace Shader::IR {
 
 struct Function {
-    boost::container::small_vector<Block*, 16> blocks;
+    BlockList blocks;
 };
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index ada0be8343..30932043f9 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -44,24 +44,27 @@ F64 IREmitter::Imm64(f64 value) const {
     return F64{Value{value}};
 }
 
-void IREmitter::Branch(IR::Block* label) {
+void IREmitter::Branch(Block* label) {
+    label->AddImmediatePredecessor(block);
     Inst(Opcode::Branch, label);
 }
 
-void IREmitter::BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label) {
-    Inst(Opcode::BranchConditional, cond, true_label, false_label);
+void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) {
+    true_label->AddImmediatePredecessor(block);
+    false_label->AddImmediatePredecessor(block);
+    Inst(Opcode::BranchConditional, condition, true_label, false_label);
 }
 
-void IREmitter::Exit() {
-    Inst(Opcode::Exit);
+void IREmitter::LoopMerge(Block* merge_block, Block* continue_target) {
+    Inst(Opcode::LoopMerge, merge_block, continue_target);
 }
 
-void IREmitter::Return() {
-    Inst(Opcode::Return);
+void IREmitter::SelectionMerge(Block* merge_block) {
+    Inst(Opcode::SelectionMerge, merge_block);
 }
 
-void IREmitter::Unreachable() {
-    Inst(Opcode::Unreachable);
+void IREmitter::Return() {
+    Inst(Opcode::Return);
 }
 
 U32 IREmitter::GetReg(IR::Reg reg) {
@@ -81,6 +84,14 @@ U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) {
     }
 }
 
+U1 IREmitter::GetGotoVariable(u32 id) {
+    return Inst<U1>(Opcode::GetGotoVariable, id);
+}
+
+void IREmitter::SetGotoVariable(u32 id, const U1& value) {
+    Inst(Opcode::SetGotoVariable, id, value);
+}
+
 void IREmitter::SetPred(IR::Pred pred, const U1& value) {
     Inst(Opcode::SetPred, pred, value);
 }
@@ -121,6 +132,20 @@ void IREmitter::SetOFlag(const U1& value) {
     Inst(Opcode::SetOFlag, value);
 }
 
+U1 IREmitter::Condition(IR::Condition cond) {
+    if (cond == IR::Condition{true}) {
+        return Imm1(true);
+    } else if (cond == IR::Condition{false}) {
+        return Imm1(false);
+    }
+    const FlowTest flow_test{cond.FlowTest()};
+    const auto [pred, is_negated]{cond.Pred()};
+    if (flow_test == FlowTest::T) {
+        return GetPred(pred, is_negated);
+    }
+    throw NotImplementedException("Condition {}", cond);
+}
+
 F32 IREmitter::GetAttribute(IR::Attribute attribute) {
     return Inst<F32>(Opcode::GetAttribute, attribute);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index bfd9916cca..4decb46bc1 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -16,11 +16,11 @@ namespace Shader::IR {
 
 class IREmitter {
 public:
-    explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {}
+    explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
     explicit IREmitter(Block& block_, Block::iterator insertion_point_)
-        : block{block_}, insertion_point{insertion_point_} {}
+        : block{&block_}, insertion_point{insertion_point_} {}
 
-    Block& block;
+    Block* block;
 
     [[nodiscard]] U1 Imm1(bool value) const;
     [[nodiscard]] U8 Imm8(u8 value) const;
@@ -31,11 +31,11 @@ public:
     [[nodiscard]] U64 Imm64(u64 value) const;
     [[nodiscard]] F64 Imm64(f64 value) const;
 
-    void Branch(IR::Block* label);
-    void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label);
-    void Exit();
+    void Branch(Block* label);
+    void BranchConditional(const U1& condition, Block* true_label, Block* false_label);
+    void LoopMerge(Block* merge_block, Block* continue_target);
+    void SelectionMerge(Block* merge_block);
     void Return();
-    void Unreachable();
 
     [[nodiscard]] U32 GetReg(IR::Reg reg);
     void SetReg(IR::Reg reg, const U32& value);
@@ -43,6 +43,9 @@ public:
     [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false);
     void SetPred(IR::Pred pred, const U1& value);
 
+    [[nodiscard]] U1 GetGotoVariable(u32 id);
+    void SetGotoVariable(u32 id, const U1& value);
+
     [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
 
     [[nodiscard]] U1 GetZFlag();
@@ -55,6 +58,8 @@ public:
     void SetCFlag(const U1& value);
     void SetOFlag(const U1& value);
 
+    [[nodiscard]] U1 Condition(IR::Condition cond);
+
     [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
     void SetAttribute(IR::Attribute attribute, const F32& value);
 
@@ -168,7 +173,7 @@ private:
 
     template <typename T = Value, typename... Args>
     T Inst(Opcode op, Args... args) {
-        auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})};
+        auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
         return T{Value{&*it}};
     }
 
@@ -184,7 +189,7 @@ private:
     T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
         u64 raw_flags{};
         std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
-        auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
+        auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
         return T{Value{&*it}};
     }
 };
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index e7ca92039e..b4ae371bd1 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -51,9 +51,9 @@ bool Inst::MayHaveSideEffects() const noexcept {
     switch (op) {
     case Opcode::Branch:
     case Opcode::BranchConditional:
-    case Opcode::Exit:
+    case Opcode::LoopMerge:
+    case Opcode::SelectionMerge:
     case Opcode::Return:
-    case Opcode::Unreachable:
     case Opcode::SetAttribute:
     case Opcode::SetAttributeIndexed:
     case Opcode::WriteGlobalU8:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 5dc65f2dfa..ede5e20c2d 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -10,15 +10,17 @@ OPCODE(Identity,                                            Opaque,         Opaq
 // Control flow
 OPCODE(Branch,                                              Void,           Label,                                                          )
 OPCODE(BranchConditional,                                   Void,           U1,             Label,          Label,                          )
-OPCODE(Exit,                                                Void,                                                                           )
+OPCODE(LoopMerge,                                           Void,           Label,          Label,                                          )
+OPCODE(SelectionMerge,                                      Void,           Label,                                                          )
 OPCODE(Return,                                              Void,                                                                           )
-OPCODE(Unreachable,                                         Void,                                                                           )
 
 // Context getters/setters
 OPCODE(GetRegister,                                         U32,            Reg,                                                            )
 OPCODE(SetRegister,                                         Void,           Reg,            U32,                                            )
 OPCODE(GetPred,                                             U1,             Pred,                                                           )
 OPCODE(SetPred,                                             Void,           Pred,           U1,                                             )
+OPCODE(GetGotoVariable,                                     U1,             U32,                                                            )
+OPCODE(SetGotoVariable,                                     Void,           U32,            U1,                                             )
 OPCODE(GetCbuf,                                             U32,            U32,            U32,                                            )
 OPCODE(GetAttribute,                                        U32,            Attribute,                                                      )
 OPCODE(SetAttribute,                                        Void,           Attribute,      U32,                                            )
@@ -36,11 +38,11 @@ OPCODE(WorkgroupId,                                         U32x3,
 OPCODE(LocalInvocationId,                                   U32x3,                                                                          )
 
 // Undefined
-OPCODE(Undef1,                                              U1,                                                                             )
-OPCODE(Undef8,                                              U8,                                                                             )
-OPCODE(Undef16,                                             U16,                                                                            )
-OPCODE(Undef32,                                             U32,                                                                            )
-OPCODE(Undef64,                                             U64,                                                                            )
+OPCODE(UndefU1,                                             U1,                                                                             )
+OPCODE(UndefU8,                                             U8,                                                                             )
+OPCODE(UndefU16,                                            U16,                                                                            )
+OPCODE(UndefU32,                                            U32,                                                                            )
+OPCODE(UndefU64,                                            U64,                                                                            )
 
 // Memory operations
 OPCODE(LoadGlobalU8,                                        U32,            U64,                                                            )
diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp
new file mode 100644
index 0000000000..2e9ce25256
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp
@@ -0,0 +1,742 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <ranges>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include <boost/intrusive/list.hpp>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::IR {
+namespace {
+struct Statement;
+
+// Use normal_link because we are not guaranteed to destroy the tree in order
+using ListBaseHook =
+    boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>;
+
+using Tree = boost::intrusive::list<Statement,
+                                    // Allow using Statement without a definition
+                                    boost::intrusive::base_hook<ListBaseHook>,
+                                    // Avoid linear complexity on splice, size is never called
+                                    boost::intrusive::constant_time_size<false>>;
+using Node = Tree::iterator;
+using ConstNode = Tree::const_iterator;
+
+enum class StatementType {
+    Code,
+    Goto,
+    Label,
+    If,
+    Loop,
+    Break,
+    Return,
+    Function,
+    Identity,
+    Not,
+    Or,
+    SetVariable,
+    Variable,
+};
+
+bool HasChildren(StatementType type) {
+    switch (type) {
+    case StatementType::If:
+    case StatementType::Loop:
+    case StatementType::Function:
+        return true;
+    default:
+        return false;
+    }
+}
+
+struct Goto {};
+struct Label {};
+struct If {};
+struct Loop {};
+struct Break {};
+struct Return {};
+struct FunctionTag {};
+struct Identity {};
+struct Not {};
+struct Or {};
+struct SetVariable {};
+struct Variable {};
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement
+#endif
+struct Statement : ListBaseHook {
+    Statement(Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {}
+    Statement(Goto, Statement* cond_, Node label_, Statement* up_)
+        : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {}
+    Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {}
+    Statement(If, Statement* cond_, Tree&& children_, Statement* up_)
+        : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {}
+    Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_)
+        : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {}
+    Statement(Break, Statement* cond_, Statement* up_)
+        : cond{cond_}, up{up_}, type{StatementType::Break} {}
+    Statement(Return) : type{StatementType::Return} {}
+    Statement(FunctionTag) : children{}, type{StatementType::Function} {}
+    Statement(Identity, Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {}
+    Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {}
+    Statement(Or, Statement* op_a_, Statement* op_b_)
+        : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {}
+    Statement(SetVariable, u32 id_, Statement* op_, Statement* up_)
+        : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {}
+    Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {}
+
+    ~Statement() {
+        if (HasChildren(type)) {
+            std::destroy_at(&children);
+        }
+    }
+
+    union {
+        Block* code;
+        Node label;
+        Tree children;
+        Condition guest_cond;
+        Statement* op;
+        Statement* op_a;
+    };
+    union {
+        Statement* cond;
+        Statement* op_b;
+        u32 id;
+    };
+    Statement* up{};
+    StatementType type;
+};
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+std::string DumpExpr(const Statement* stmt) {
+    switch (stmt->type) {
+    case StatementType::Identity:
+        return fmt::format("{}", stmt->guest_cond);
+    case StatementType::Not:
+        return fmt::format("!{}", DumpExpr(stmt->op));
+    case StatementType::Or:
+        return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b));
+    case StatementType::Variable:
+        return fmt::format("goto_L{}", stmt->id);
+    default:
+        return "<invalid type>";
+    }
+}
+
+std::string DumpTree(const Tree& tree, u32 indentation = 0) {
+    std::string ret;
+    std::string indent(indentation, ' ');
+    for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) {
+        switch (stmt->type) {
+        case StatementType::Code:
+            ret += fmt::format("{}    Block {:04x};\n", indent, stmt->code->LocationBegin());
+            break;
+        case StatementType::Goto:
+            ret += fmt::format("{}    if ({}) goto L{};\n", indent, DumpExpr(stmt->cond),
+                               stmt->label->id);
+            break;
+        case StatementType::Label:
+            ret += fmt::format("{}L{}:\n", indent, stmt->id);
+            break;
+        case StatementType::If:
+            ret += fmt::format("{}    if ({}) {{\n", indent, DumpExpr(stmt->cond));
+            ret += DumpTree(stmt->children, indentation + 4);
+            ret += fmt::format("{}    }}\n", indent);
+            break;
+        case StatementType::Loop:
+            ret += fmt::format("{}    do {{\n", indent);
+            ret += DumpTree(stmt->children, indentation + 4);
+            ret += fmt::format("{}    }} while ({});\n", indent, DumpExpr(stmt->cond));
+            break;
+        case StatementType::Break:
+            ret += fmt::format("{}    if ({}) break;\n", indent, DumpExpr(stmt->cond));
+            break;
+        case StatementType::Return:
+            ret += fmt::format("{}    return;\n", indent);
+            break;
+        case StatementType::SetVariable:
+            ret += fmt::format("{}    goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
+            break;
+        case StatementType::Function:
+        case StatementType::Identity:
+        case StatementType::Not:
+        case StatementType::Or:
+        case StatementType::Variable:
+            throw LogicError("Statement can't be printed");
+        }
+    }
+    return ret;
+}
+
+bool HasNode(const Tree& tree, ConstNode stmt) {
+    const auto end{tree.end()};
+    for (auto it = tree.begin(); it != end; ++it) {
+        if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) {
+            return true;
+        }
+    }
+    return false;
+}
+
+Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) {
+    const ConstNode label_stmt{goto_stmt->label};
+    const ConstNode end{tree.end()};
+    for (auto it = tree.begin(); it != end; ++it) {
+        if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) {
+            return it;
+        }
+    }
+    throw LogicError("Lift label not in tree");
+}
+
+void SanitizeNoBreaks(const Tree& tree) {
+    if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
+        throw NotImplementedException("Capturing statement with break nodes");
+    }
+}
+
+size_t Level(Node stmt) {
+    size_t level{0};
+    Statement* node{stmt->up};
+    while (node) {
+        ++level;
+        node = node->up;
+    }
+    return level;
+}
+
+bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) {
+    const size_t goto_level{Level(goto_stmt)};
+    const size_t label_level{Level(label_stmt)};
+    size_t min_level;
+    size_t max_level;
+    Node min;
+    Node max;
+    if (label_level < goto_level) {
+        min_level = label_level;
+        max_level = goto_level;
+        min = label_stmt;
+        max = goto_stmt;
+    } else { // goto_level < label_level
+        min_level = goto_level;
+        max_level = label_level;
+        min = goto_stmt;
+        max = label_stmt;
+    }
+    while (max_level > min_level) {
+        --max_level;
+        max = max->up;
+    }
+    return min->up == max->up;
+}
+
+bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) {
+    return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt);
+}
+
+bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) {
+    ++offset;
+
+    const auto end = tree.end();
+    for (ConstNode it = tree.begin(); it != end; ++it) {
+        ++offset;
+        if (stmt == it) {
+            return true;
+        }
+        if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+class GotoPass {
+public:
+    explicit GotoPass(std::span<Block* const> blocks, ObjectPool<Statement, 64>& stmt_pool)
+        : pool{stmt_pool} {
+        std::vector gotos{BuildUnorderedTreeGetGotos(blocks)};
+        fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children));
+        for (const Node& goto_stmt : gotos | std::views::reverse) {
+            RemoveGoto(goto_stmt);
+        }
+        fmt::print(stdout, "AFTER\n{}\n", DumpTree(root_stmt.children));
+    }
+
+    Statement& RootStatement() noexcept {
+        return root_stmt;
+    }
+
+private:
+    void RemoveGoto(Node goto_stmt) {
+        // Force goto_stmt and label_stmt to be directly related
+        const Node label_stmt{goto_stmt->label};
+        if (IsIndirectlyRelated(goto_stmt, label_stmt)) {
+            // Move goto_stmt out using outward-movement transformation until it becomes
+            // directly related to label_stmt
+            while (!IsDirectlyRelated(goto_stmt, label_stmt)) {
+                goto_stmt = MoveOutward(goto_stmt);
+            }
+        }
+        // Force goto_stmt and label_stmt to be siblings
+        if (IsDirectlyRelated(goto_stmt, label_stmt)) {
+            const size_t label_level{Level(label_stmt)};
+            size_t goto_level{Level(goto_stmt)};
+            if (goto_level > label_level) {
+                // Move goto_stmt out of its level using outward-movement transformations
+                while (goto_level > label_level) {
+                    goto_stmt = MoveOutward(goto_stmt);
+                    --goto_level;
+                }
+            } else { // Level(goto_stmt) < Level(label_stmt)
+                if (Offset(goto_stmt) > Offset(label_stmt)) {
+                    // Lift goto_stmt to above stmt containing label_stmt using goto-lifting
+                    // transformations
+                    goto_stmt = Lift(goto_stmt);
+                }
+                // Move goto_stmt into label_stmt's level using inward-movement transformation
+                while (goto_level < label_level) {
+                    goto_stmt = MoveInward(goto_stmt);
+                    ++goto_level;
+                }
+            }
+        }
+        // TODO: Remove this
+        Node it{goto_stmt};
+        bool sibling{false};
+        do {
+            sibling |= it == label_stmt;
+            --it;
+        } while (it != goto_stmt->up->children.begin());
+        while (it != goto_stmt->up->children.end()) {
+            sibling |= it == label_stmt;
+            ++it;
+        }
+        if (!sibling) {
+            throw LogicError("Not siblings");
+        }
+
+        // goto_stmt and label_stmt are guaranteed to be siblings, eliminate
+        if (std::next(goto_stmt) == label_stmt) {
+            // Simply eliminate the goto if the label is next to it
+            goto_stmt->up->children.erase(goto_stmt);
+        } else if (Offset(goto_stmt) < Offset(label_stmt)) {
+            // Eliminate goto_stmt with a conditional
+            EliminateAsConditional(goto_stmt, label_stmt);
+        } else {
+            // Eliminate goto_stmt with a loop
+            EliminateAsLoop(goto_stmt, label_stmt);
+        }
+    }
+
+    std::vector<Node> BuildUnorderedTreeGetGotos(std::span<Block* const> blocks) {
+        // Assume all blocks have two branches
+        std::vector<Node> gotos;
+        gotos.reserve(blocks.size() * 2);
+
+        const std::unordered_map labels_map{BuildLabels(blocks)};
+        Tree& root{root_stmt.children};
+        auto insert_point{root.begin()};
+        for (Block* const block : blocks) {
+            ++insert_point; // Skip label
+            ++insert_point; // Skip set variable
+            root.insert(insert_point, *pool.Create(block, &root_stmt));
+
+            if (block->IsTerminationBlock()) {
+                root.insert(insert_point, *pool.Create(Return{}));
+                continue;
+            }
+            const Condition cond{block->BranchCondition()};
+            Statement* const true_cond{pool.Create(Identity{}, Condition{true})};
+            if (cond == Condition{true} || cond == Condition{false}) {
+                const bool is_true{cond == Condition{true}};
+                const Block* const branch{is_true ? block->TrueBranch() : block->FalseBranch()};
+                const Node label{labels_map.at(branch)};
+                Statement* const goto_stmt{pool.Create(Goto{}, true_cond, label, &root_stmt)};
+                gotos.push_back(root.insert(insert_point, *goto_stmt));
+            } else {
+                Statement* const ident_cond{pool.Create(Identity{}, cond)};
+                const Node true_label{labels_map.at(block->TrueBranch())};
+                const Node false_label{labels_map.at(block->FalseBranch())};
+                Statement* goto_true{pool.Create(Goto{}, ident_cond, true_label, &root_stmt)};
+                Statement* goto_false{pool.Create(Goto{}, true_cond, false_label, &root_stmt)};
+                gotos.push_back(root.insert(insert_point, *goto_true));
+                gotos.push_back(root.insert(insert_point, *goto_false));
+            }
+        }
+        return gotos;
+    }
+
+    std::unordered_map<const Block*, Node> BuildLabels(std::span<Block* const> blocks) {
+        // TODO: Consider storing labels intrusively inside the block
+        std::unordered_map<const Block*, Node> labels_map;
+        Tree& root{root_stmt.children};
+        u32 label_id{0};
+        for (const Block* const block : blocks) {
+            Statement* const label{pool.Create(Label{}, label_id, &root_stmt)};
+            labels_map.emplace(block, root.insert(root.end(), *label));
+            Statement* const false_stmt{pool.Create(Identity{}, Condition{false})};
+            root.push_back(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt));
+            ++label_id;
+        }
+        return labels_map;
+    }
+
+    void UpdateTreeUp(Statement* tree) {
+        for (Statement& stmt : tree->children) {
+            stmt.up = tree;
+        }
+    }
+
+    void EliminateAsConditional(Node goto_stmt, Node label_stmt) {
+        Tree& body{goto_stmt->up->children};
+        Tree if_body;
+        if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt);
+        Statement* const cond{pool.Create(Not{}, goto_stmt->cond)};
+        Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)};
+        UpdateTreeUp(if_stmt);
+        body.insert(goto_stmt, *if_stmt);
+        body.erase(goto_stmt);
+    }
+
+    void EliminateAsLoop(Node goto_stmt, Node label_stmt) {
+        Tree& body{goto_stmt->up->children};
+        Tree loop_body;
+        loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt);
+        Statement* const cond{goto_stmt->cond};
+        Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)};
+        UpdateTreeUp(loop);
+        body.insert(goto_stmt, *loop);
+        body.erase(goto_stmt);
+    }
+
+    [[nodiscard]] Node MoveOutward(Node goto_stmt) {
+        switch (goto_stmt->up->type) {
+        case StatementType::If:
+            return MoveOutwardIf(goto_stmt);
+        case StatementType::Loop:
+            return MoveOutwardLoop(goto_stmt);
+        default:
+            throw LogicError("Invalid outward movement");
+        }
+    }
+
+    [[nodiscard]] Node MoveInward(Node goto_stmt) {
+        Statement* const parent{goto_stmt->up};
+        Tree& body{parent->children};
+        const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)};
+        const Node label{goto_stmt->label};
+        const u32 label_id{label->id};
+
+        Statement* const goto_cond{goto_stmt->cond};
+        Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
+        body.insert(goto_stmt, *set_var);
+
+        Tree if_body;
+        if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt);
+        Statement* const variable{pool.Create(Variable{}, label_id)};
+        Statement* const neg_var{pool.Create(Not{}, variable)};
+        if (!if_body.empty()) {
+            Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)};
+            UpdateTreeUp(if_stmt);
+            body.insert(goto_stmt, *if_stmt);
+        }
+        body.erase(goto_stmt);
+
+        // Update nested if condition
+        switch (label_nested_stmt->type) {
+        case StatementType::If:
+            label_nested_stmt->cond = pool.Create(Or{}, neg_var, label_nested_stmt->cond);
+            break;
+        case StatementType::Loop:
+            break;
+        default:
+            throw LogicError("Invalid inward movement");
+        }
+        Tree& nested_tree{label_nested_stmt->children};
+        Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
+        return nested_tree.insert(nested_tree.begin(), *new_goto);
+    }
+
+    [[nodiscard]] Node Lift(Node goto_stmt) {
+        Statement* const parent{goto_stmt->up};
+        Tree& body{parent->children};
+        const Node label{goto_stmt->label};
+        const u32 label_id{label->id};
+        const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)};
+        const auto type{label_nested_stmt->type};
+
+        Tree loop_body;
+        loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
+        SanitizeNoBreaks(loop_body);
+        Statement* const variable{pool.Create(Variable{}, label_id)};
+        Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
+        UpdateTreeUp(loop_stmt);
+        const Node loop_node{body.insert(goto_stmt, *loop_stmt)};
+
+        Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
+        loop_stmt->children.push_front(*new_goto);
+        const Node new_goto_node{loop_stmt->children.begin()};
+
+        Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)};
+        loop_stmt->children.push_back(*set_var);
+
+        body.erase(goto_stmt);
+        return new_goto_node;
+    }
+
+    Node MoveOutwardIf(Node goto_stmt) {
+        const Node parent{Tree::s_iterator_to(*goto_stmt->up)};
+        Tree& body{parent->children};
+        const u32 label_id{goto_stmt->label->id};
+        Statement* const goto_cond{goto_stmt->cond};
+        Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)};
+        body.insert(goto_stmt, *set_goto_var);
+
+        Tree if_body;
+        if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end());
+        if_body.pop_front();
+        Statement* const cond{pool.Create(Variable{}, label_id)};
+        Statement* const neg_cond{pool.Create(Not{}, cond)};
+        Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)};
+        UpdateTreeUp(if_stmt);
+        body.insert(goto_stmt, *if_stmt);
+
+        body.erase(goto_stmt);
+
+        Statement* const new_cond{pool.Create(Variable{}, label_id)};
+        Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)};
+        Tree& parent_tree{parent->up->children};
+        return parent_tree.insert(std::next(parent), *new_goto);
+    }
+
+    Node MoveOutwardLoop(Node goto_stmt) {
+        Statement* const parent{goto_stmt->up};
+        Tree& body{parent->children};
+        const u32 label_id{goto_stmt->label->id};
+        Statement* const goto_cond{goto_stmt->cond};
+        Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
+        Statement* const cond{pool.Create(Variable{}, label_id)};
+        Statement* const break_stmt{pool.Create(Break{}, cond, parent)};
+        body.insert(goto_stmt, *set_goto_var);
+        body.insert(goto_stmt, *break_stmt);
+        body.erase(goto_stmt);
+
+        const Node loop{Tree::s_iterator_to(*goto_stmt->up)};
+        Statement* const new_goto_cond{pool.Create(Variable{}, label_id)};
+        Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)};
+        Tree& parent_tree{loop->up->children};
+        return parent_tree.insert(std::next(loop), *new_goto);
+    }
+
+    size_t Offset(ConstNode stmt) const {
+        size_t offset{0};
+        if (!SearchNode(root_stmt.children, stmt, offset)) {
+            fmt::print(stdout, "{}\n", DumpTree(root_stmt.children));
+            throw LogicError("Node not found in tree");
+        }
+        return offset;
+    }
+
+    ObjectPool<Statement, 64>& pool;
+    Statement root_stmt{FunctionTag{}};
+};
+
+Block* TryFindForwardBlock(const Statement& stmt) {
+    const Tree& tree{stmt.up->children};
+    const ConstNode end{tree.cend()};
+    ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))};
+    while (forward_node != end && !HasChildren(forward_node->type)) {
+        if (forward_node->type == StatementType::Code) {
+            return forward_node->code;
+        }
+        ++forward_node;
+    }
+    return nullptr;
+}
+
+[[nodiscard]] U1 VisitExpr(IREmitter& ir, const Statement& stmt) {
+    switch (stmt.type) {
+    case StatementType::Identity:
+        return ir.Condition(stmt.guest_cond);
+    case StatementType::Not:
+        return ir.LogicalNot(U1{VisitExpr(ir, *stmt.op)});
+    case StatementType::Or:
+        return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b));
+    case StatementType::Variable:
+        return ir.GetGotoVariable(stmt.id);
+    default:
+        throw NotImplementedException("Statement type {}", stmt.type);
+    }
+}
+
+class TranslatePass {
+public:
+    TranslatePass(ObjectPool<Inst>& inst_pool_, ObjectPool<Block>& block_pool_,
+                  ObjectPool<Statement, 64>& stmt_pool_, Statement& root_stmt,
+                  const std::function<void(IR::Block*)>& func_, BlockList& block_list_)
+        : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, func{func_},
+          block_list{block_list_} {
+        Visit(root_stmt, nullptr, nullptr);
+    }
+
+private:
+    void Visit(Statement& parent, Block* continue_block, Block* break_block) {
+        Tree& tree{parent.children};
+        Block* current_block{nullptr};
+
+        for (auto it = tree.begin(); it != tree.end(); ++it) {
+            Statement& stmt{*it};
+            switch (stmt.type) {
+            case StatementType::Label:
+                // Labels can be ignored
+                break;
+            case StatementType::Code: {
+                if (current_block && current_block != stmt.code) {
+                    IREmitter ir{*current_block};
+                    ir.Branch(stmt.code);
+                }
+                current_block = stmt.code;
+                func(stmt.code);
+                block_list.push_back(stmt.code);
+                break;
+            }
+            case StatementType::SetVariable: {
+                if (!current_block) {
+                    current_block = MergeBlock(parent, stmt);
+                }
+                IREmitter ir{*current_block};
+                ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
+                break;
+            }
+            case StatementType::If: {
+                if (!current_block) {
+                    current_block = block_pool.Create(inst_pool);
+                    block_list.push_back(current_block);
+                }
+                Block* const merge_block{MergeBlock(parent, stmt)};
+
+                // Visit children
+                const size_t first_block_index{block_list.size()};
+                Visit(stmt, merge_block, break_block);
+
+                // Implement if header block
+                Block* const first_if_block{block_list.at(first_block_index)};
+                IREmitter ir{*current_block};
+                const U1 cond{VisitExpr(ir, *stmt.cond)};
+                ir.SelectionMerge(merge_block);
+                ir.BranchConditional(cond, first_if_block, merge_block);
+
+                current_block = merge_block;
+                break;
+            }
+            case StatementType::Loop: {
+                Block* const loop_header_block{block_pool.Create(inst_pool)};
+                if (current_block) {
+                    IREmitter{*current_block}.Branch(loop_header_block);
+                }
+                block_list.push_back(loop_header_block);
+
+                Block* const new_continue_block{block_pool.Create(inst_pool)};
+                Block* const merge_block{MergeBlock(parent, stmt)};
+
+                // Visit children
+                const size_t first_block_index{block_list.size()};
+                Visit(stmt, new_continue_block, merge_block);
+
+                // The continue block is located at the end of the loop
+                block_list.push_back(new_continue_block);
+
+                // Implement loop header block
+                Block* const first_loop_block{block_list.at(first_block_index)};
+                IREmitter ir{*loop_header_block};
+                ir.LoopMerge(merge_block, new_continue_block);
+                ir.Branch(first_loop_block);
+
+                // Implement continue block
+                IREmitter continue_ir{*new_continue_block};
+                const U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)};
+                continue_ir.BranchConditional(continue_cond, ir.block, merge_block);
+
+                current_block = merge_block;
+                break;
+            }
+            case StatementType::Break: {
+                if (!current_block) {
+                    current_block = block_pool.Create(inst_pool);
+                    block_list.push_back(current_block);
+                }
+                Block* const skip_block{MergeBlock(parent, stmt)};
+
+                IREmitter ir{*current_block};
+                ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block);
+
+                current_block = skip_block;
+                break;
+            }
+            case StatementType::Return: {
+                if (!current_block) {
+                    current_block = block_pool.Create(inst_pool);
+                    block_list.push_back(current_block);
+                }
+                IREmitter{*current_block}.Return();
+                current_block = nullptr;
+                break;
+            }
+            default:
+                throw NotImplementedException("Statement type {}", stmt.type);
+            }
+        }
+        if (current_block && continue_block) {
+            IREmitter ir{*current_block};
+            ir.Branch(continue_block);
+        }
+    }
+
+    Block* MergeBlock(Statement& parent, Statement& stmt) {
+        if (Block* const block{TryFindForwardBlock(stmt)}) {
+            return block;
+        }
+        // Create a merge block we can visit later
+        Block* const block{block_pool.Create(inst_pool)};
+        Statement* const merge_stmt{stmt_pool.Create(block, &parent)};
+        parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
+        return block;
+    }
+
+    ObjectPool<Statement, 64>& stmt_pool;
+    ObjectPool<Inst>& inst_pool;
+    ObjectPool<Block>& block_pool;
+    const std::function<void(IR::Block*)>& func;
+    BlockList& block_list;
+};
+} // Anonymous namespace
+
+BlockList VisitAST(ObjectPool<Inst>& inst_pool, ObjectPool<Block>& block_pool,
+                   std::span<Block* const> unordered_blocks,
+                   const std::function<void(Block*)>& func) {
+    ObjectPool<Statement, 64> stmt_pool;
+    GotoPass goto_pass{unordered_blocks, stmt_pool};
+    BlockList block_list;
+    TranslatePass translate_pass{inst_pool, block_pool, stmt_pool, goto_pass.RootStatement(),
+                                 func,      block_list};
+    return block_list;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.h b/src/shader_recompiler/frontend/ir/structured_control_flow.h
new file mode 100644
index 0000000000..a574c24f77
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/structured_control_flow.h
@@ -0,0 +1,22 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <functional>
+#include <span>
+
+#include <boost/intrusive/list.hpp>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::IR {
+
+[[nodiscard]] BlockList VisitAST(ObjectPool<Inst>& inst_pool, ObjectPool<Block>& block_pool,
+                                 std::span<Block* const> unordered_blocks,
+                                 const std::function<void(Block*)>& func);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
index 21ee981371..e766b555bd 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -17,38 +17,49 @@
 #include "shader_recompiler/frontend/maxwell/location.h"
 
 namespace Shader::Maxwell::Flow {
+namespace {
+struct Compare {
+    bool operator()(const Block& lhs, Location rhs) const noexcept {
+        return lhs.begin < rhs;
+    }
+
+    bool operator()(Location lhs, const Block& rhs) const noexcept {
+        return lhs < rhs.begin;
+    }
+
+    bool operator()(const Block& lhs, const Block& rhs) const noexcept {
+        return lhs.begin < rhs.begin;
+    }
+};
+} // Anonymous namespace
 
 static u32 BranchOffset(Location pc, Instruction inst) {
     return pc.Offset() + inst.branch.Offset() + 8;
 }
 
-static std::array<Block, 2> Split(Block&& block, Location pc, BlockId new_id) {
-    if (pc <= block.begin || pc >= block.end) {
+static void Split(Block* old_block, Block* new_block, Location pc) {
+    if (pc <= old_block->begin || pc >= old_block->end) {
         throw InvalidArgument("Invalid address to split={}", pc);
     }
-    return {
-        Block{
-            .begin{block.begin},
-            .end{pc},
-            .end_class{EndClass::Branch},
-            .id{block.id},
-            .stack{block.stack},
-            .cond{true},
-            .branch_true{new_id},
-            .branch_false{UNREACHABLE_BLOCK_ID},
-            .imm_predecessors{},
-        },
-        Block{
-            .begin{pc},
-            .end{block.end},
-            .end_class{block.end_class},
-            .id{new_id},
-            .stack{std::move(block.stack)},
-            .cond{block.cond},
-            .branch_true{block.branch_true},
-            .branch_false{block.branch_false},
-            .imm_predecessors{},
-        },
+    *new_block = Block{
+        .begin{pc},
+        .end{old_block->end},
+        .end_class{old_block->end_class},
+        .stack{old_block->stack},
+        .cond{old_block->cond},
+        .branch_true{old_block->branch_true},
+        .branch_false{old_block->branch_false},
+        .ir{nullptr},
+    };
+    *old_block = Block{
+        .begin{old_block->begin},
+        .end{pc},
+        .end_class{EndClass::Branch},
+        .stack{std::move(old_block->stack)},
+        .cond{IR::Condition{true}},
+        .branch_true{new_block},
+        .branch_false{nullptr},
+        .ir{nullptr},
     };
 }
 
@@ -112,7 +123,7 @@ static bool HasFlowTest(Opcode opcode) {
 
 static std::string NameOf(const Block& block) {
     if (block.begin.IsVirtual()) {
-        return fmt::format("\"Virtual {}\"", block.id);
+        return fmt::format("\"Virtual {}\"", block.begin);
     } else {
         return fmt::format("\"{}\"", block.begin);
     }
@@ -158,126 +169,23 @@ bool Block::Contains(Location pc) const noexcept {
 Function::Function(Location start_address)
     : entrypoint{start_address}, labels{{
                                      .address{start_address},
-                                     .block_id{0},
+                                     .block{nullptr},
                                      .stack{},
                                  }} {}
 
-void Function::BuildBlocksMap() {
-    const size_t num_blocks{NumBlocks()};
-    blocks_map.resize(num_blocks);
-    for (size_t block_index = 0; block_index < num_blocks; ++block_index) {
-        Block& block{blocks_data[block_index]};
-        blocks_map[block.id] = &block;
-    }
-}
-
-void Function::BuildImmediatePredecessors() {
-    for (const Block& block : blocks_data) {
-        if (block.branch_true != UNREACHABLE_BLOCK_ID) {
-            blocks_map[block.branch_true]->imm_predecessors.push_back(block.id);
-        }
-        if (block.branch_false != UNREACHABLE_BLOCK_ID) {
-            blocks_map[block.branch_false]->imm_predecessors.push_back(block.id);
-        }
-    }
-}
-
-void Function::BuildPostOrder() {
-    boost::container::small_vector<BlockId, 0x110> block_stack;
-    post_order_map.resize(NumBlocks());
-
-    Block& first_block{blocks_data[blocks.front()]};
-    first_block.post_order_visited = true;
-    block_stack.push_back(first_block.id);
-
-    const auto visit_branch = [&](BlockId block_id, BlockId branch_id) {
-        if (branch_id == UNREACHABLE_BLOCK_ID) {
-            return false;
-        }
-        if (blocks_map[branch_id]->post_order_visited) {
-            return false;
-        }
-        blocks_map[branch_id]->post_order_visited = true;
-
-        // Calling push_back twice is faster than insert on msvc
-        block_stack.push_back(block_id);
-        block_stack.push_back(branch_id);
-        return true;
-    };
-    while (!block_stack.empty()) {
-        const Block* const block{blocks_map[block_stack.back()]};
-        block_stack.pop_back();
-
-        if (!visit_branch(block->id, block->branch_true) &&
-            !visit_branch(block->id, block->branch_false)) {
-            post_order_map[block->id] = static_cast<u32>(post_order_blocks.size());
-            post_order_blocks.push_back(block->id);
-        }
-    }
-}
-
-void Function::BuildImmediateDominators() {
-    auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })};
-    auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id};
-    auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })};
-    auto intersect{[this](Block* finger1, Block* finger2) {
-        while (finger1 != finger2) {
-            while (post_order_map[finger1->id] < post_order_map[finger2->id]) {
-                finger1 = finger1->imm_dominator;
-            }
-            while (post_order_map[finger2->id] < post_order_map[finger1->id]) {
-                finger2 = finger2->imm_dominator;
-            }
-        }
-        return finger1;
-    }};
-    for (Block& block : blocks_data) {
-        block.imm_dominator = nullptr;
-    }
-    Block* const start_block{&blocks_data[blocks.front()]};
-    start_block->imm_dominator = start_block;
-
-    bool changed{true};
-    while (changed) {
-        changed = false;
-        for (Block* const block : post_order_blocks | reverse_order_but_first) {
-            Block* new_idom{};
-            for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) {
-                new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor;
-            }
-            changed |= block->imm_dominator != new_idom;
-            block->imm_dominator = new_idom;
-        }
-    }
-}
-
-void Function::BuildDominanceFrontier() {
-    auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })};
-    auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }};
-    for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) {
-        for (Block* current : block.imm_predecessors | transform_block_id) {
-            while (current != block.imm_dominator) {
-                current->dominance_frontiers.push_back(current->id);
-                current = current->imm_dominator;
-            }
-        }
-    }
-}
-
-CFG::CFG(Environment& env_, Location start_address) : env{env_} {
-    VisitFunctions(start_address);
-
-    for (Function& function : functions) {
-        function.BuildBlocksMap();
-        function.BuildImmediatePredecessors();
-        function.BuildPostOrder();
-        function.BuildImmediateDominators();
-        function.BuildDominanceFrontier();
-    }
-}
-
-void CFG::VisitFunctions(Location start_address) {
+CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address)
+    : env{env_}, block_pool{block_pool_} {
     functions.emplace_back(start_address);
+    functions.back().labels.back().block = block_pool.Create(Block{
+        .begin{start_address},
+        .end{start_address},
+        .end_class{EndClass::Branch},
+        .stack{},
+        .cond{IR::Condition{true}},
+        .branch_true{nullptr},
+        .branch_false{nullptr},
+        .ir{nullptr},
+    });
     for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) {
         while (!functions[function_id].labels.empty()) {
             Function& function{functions[function_id]};
@@ -294,35 +202,16 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
         return;
     }
     // Try to find the next block
-    Function* function{&functions[function_id]};
+    Function* const function{&functions[function_id]};
     Location pc{label.address};
-    const auto next{std::upper_bound(function->blocks.begin(), function->blocks.end(), pc,
-                                     [function](Location pc, u32 block_index) {
-                                         return pc < function->blocks_data[block_index].begin;
-                                     })};
-    const auto next_index{std::distance(function->blocks.begin(), next)};
-    const bool is_last{next == function->blocks.end()};
-    Location next_pc;
-    BlockId next_id{UNREACHABLE_BLOCK_ID};
-    if (!is_last) {
-        next_pc = function->blocks_data[*next].begin;
-        next_id = function->blocks_data[*next].id;
-    }
+    const auto next_it{function->blocks.upper_bound(pc, Compare{})};
+    const bool is_last{next_it == function->blocks.end()};
+    Block* const next{is_last ? nullptr : &*next_it};
     // Insert before the next block
-    Block block{
-        .begin{pc},
-        .end{pc},
-        .end_class{EndClass::Branch},
-        .id{label.block_id},
-        .stack{std::move(label.stack)},
-        .cond{true},
-        .branch_true{UNREACHABLE_BLOCK_ID},
-        .branch_false{UNREACHABLE_BLOCK_ID},
-        .imm_predecessors{},
-    };
+    Block* const block{label.block};
     // Analyze instructions until it reaches an already visited block or there's a branch
     bool is_branch{false};
-    while (is_last || pc < next_pc) {
+    while (!next || pc < next->begin) {
         is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch;
         if (is_branch) {
             break;
@@ -332,43 +221,36 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
     if (!is_branch) {
         // If the block finished without a branch,
         // it means that the next instruction is already visited, jump to it
-        block.end = pc;
-        block.cond = true;
-        block.branch_true = next_id;
-        block.branch_false = UNREACHABLE_BLOCK_ID;
+        block->end = pc;
+        block->cond = IR::Condition{true};
+        block->branch_true = next;
+        block->branch_false = nullptr;
     }
     // Function's pointer might be invalid, resolve it again
-    function = &functions[function_id];
-    const u32 new_block_index = static_cast<u32>(function->blocks_data.size());
-    function->blocks.insert(function->blocks.begin() + next_index, new_block_index);
-    function->blocks_data.push_back(std::move(block));
+    // Insert the new block
+    functions[function_id].blocks.insert(*block);
 }
 
 bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) {
     const Location pc{label.address};
     Function& function{functions[function_id]};
-    const auto it{std::ranges::find_if(function.blocks, [&function, pc](u32 block_index) {
-        return function.blocks_data[block_index].Contains(pc);
-    })};
+    const auto it{
+        std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })};
     if (it == function.blocks.end()) {
         // Address has not been visited
         return false;
     }
-    Block& block{function.blocks_data[*it]};
-    if (block.begin == pc) {
-        throw LogicError("Dangling branch");
-    }
-    const u32 first_index{*it};
-    const u32 second_index{static_cast<u32>(function.blocks_data.size())};
-    const std::array new_indices{first_index, second_index};
-    std::array split_blocks{Split(std::move(block), pc, label.block_id)};
-    function.blocks_data[*it] = std::move(split_blocks[0]);
-    function.blocks_data.push_back(std::move(split_blocks[1]));
-    function.blocks.insert(function.blocks.erase(it), new_indices.begin(), new_indices.end());
+    Block* const visited_block{&*it};
+    if (visited_block->begin == pc) {
+        throw LogicError("Dangling block");
+    }
+    Block* const new_block{label.block};
+    Split(visited_block, new_block, pc);
+    function.blocks.insert(it, *new_block);
     return true;
 }
 
-CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Location pc) {
+CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) {
     const Instruction inst{env.ReadInstruction(pc.Offset())};
     const Opcode opcode{Decode(inst.raw)};
     switch (opcode) {
@@ -390,12 +272,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati
             AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode));
             break;
         case Opcode::RET:
-            block.end_class = EndClass::Return;
+            block->end_class = EndClass::Return;
             break;
         default:
             break;
         }
-        block.end = pc;
+        block->end = pc;
         return AnalysisState::Branch;
     case Opcode::BRK:
     case Opcode::CONT:
@@ -404,9 +286,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati
         if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
             return AnalysisState::Continue;
         }
-        const auto [stack_pc, new_stack]{block.stack.Pop(OpcodeToken(opcode))};
-        block.branch_true = AddLabel(block, new_stack, stack_pc, function_id);
-        block.end = pc;
+        const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))};
+        block->branch_true = AddLabel(block, new_stack, stack_pc, function_id);
+        block->end = pc;
         return AnalysisState::Branch;
     }
     case Opcode::PBK:
@@ -414,7 +296,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati
     case Opcode::PEXIT:
     case Opcode::PLONGJMP:
     case Opcode::SSY:
-        block.stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
+        block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
         return AnalysisState::Continue;
     case Opcode::EXIT:
         return AnalyzeEXIT(block, function_id, pc, inst);
@@ -444,51 +326,51 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati
     return AnalysisState::Branch;
 }
 
-void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc,
+void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
                           EndClass insn_end_class, IR::Condition cond) {
-    if (block.begin != pc) {
+    if (block->begin != pc) {
         // If the block doesn't start in the conditional instruction
         // mark it as a label to visit it later
-        block.end = pc;
-        block.cond = true;
-        block.branch_true = AddLabel(block, block.stack, pc, function_id);
-        block.branch_false = UNREACHABLE_BLOCK_ID;
+        block->end = pc;
+        block->cond = IR::Condition{true};
+        block->branch_true = AddLabel(block, block->stack, pc, function_id);
+        block->branch_false = nullptr;
         return;
     }
-    // Impersonate the visited block with a virtual block
-    // Jump from this virtual to the real conditional instruction and the next instruction
-    Function& function{functions[function_id]};
-    const BlockId conditional_block_id{++function.current_block_id};
-    function.blocks.push_back(static_cast<u32>(function.blocks_data.size()));
-    Block& virtual_block{function.blocks_data.emplace_back(Block{
-        .begin{}, // Virtual block
-        .end{},
+    // Create a virtual block and a conditional block
+    Block* const conditional_block{block_pool.Create()};
+    Block virtual_block{
+        .begin{block->begin.Virtual()},
+        .end{block->begin.Virtual()},
         .end_class{EndClass::Branch},
-        .id{block.id}, // Impersonating
-        .stack{block.stack},
+        .stack{block->stack},
         .cond{cond},
-        .branch_true{conditional_block_id},
-        .branch_false{UNREACHABLE_BLOCK_ID},
-        .imm_predecessors{},
-    })};
-    // Set the end properties of the conditional instruction and give it a new identity
-    Block& conditional_block{block};
-    conditional_block.end = pc;
-    conditional_block.end_class = insn_end_class;
-    conditional_block.id = conditional_block_id;
+        .branch_true{conditional_block},
+        .branch_false{nullptr},
+        .ir{nullptr},
+    };
+    // Save the contents of the visited block in the conditional block
+    *conditional_block = std::move(*block);
+    // Impersonate the visited block with a virtual block
+    *block = std::move(virtual_block);
+    // Set the end properties of the conditional instruction
+    conditional_block->end = pc;
+    conditional_block->end_class = insn_end_class;
     // Add a label to the instruction after the conditional instruction
-    const BlockId endif_block_id{AddLabel(conditional_block, block.stack, pc + 1, function_id)};
+    Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
     // Branch to the next instruction from the virtual block
-    virtual_block.branch_false = endif_block_id;
+    block->branch_false = endif_block;
     // And branch to it from the conditional instruction if it is a branch
     if (insn_end_class == EndClass::Branch) {
-        conditional_block.cond = true;
-        conditional_block.branch_true = endif_block_id;
-        conditional_block.branch_false = UNREACHABLE_BLOCK_ID;
+        conditional_block->cond = IR::Condition{true};
+        conditional_block->branch_true = endif_block;
+        conditional_block->branch_false = nullptr;
     }
+    // Finally insert the condition block into the list of blocks
+    functions[function_id].blocks.insert(*conditional_block);
 }
 
-bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst,
+bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
                         Opcode opcode) {
     if (inst.branch.is_cbuf) {
         throw NotImplementedException("Branch with constant buffer offset");
@@ -500,21 +382,21 @@ bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instr
     const bool has_flow_test{HasFlowTest(opcode)};
     const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T};
     if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
-        block.cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated);
-        block.branch_false = AddLabel(block, block.stack, pc + 1, function_id);
+        block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated);
+        block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
     } else {
-        block.cond = true;
+        block->cond = IR::Condition{true};
     }
     return true;
 }
 
-void CFG::AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst,
+void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
                      bool is_absolute) {
     const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
-    block.branch_true = AddLabel(block, block.stack, bra_pc, function_id);
+    block->branch_true = AddLabel(block, block->stack, bra_pc, function_id);
 }
 
-void CFG::AnalyzeBRX(Block&, Location, Instruction, bool is_absolute) {
+void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) {
     throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX");
 }
 
@@ -528,7 +410,7 @@ void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) {
     }
 }
 
-CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Location pc,
+CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc,
                                     Instruction inst) {
     const IR::FlowTest flow_test{inst.branch.flow_test};
     const Predicate pred{inst.Pred()};
@@ -537,41 +419,52 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Locati
         return AnalysisState::Continue;
     }
     if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
-        if (block.stack.Peek(Token::PEXIT).has_value()) {
+        if (block->stack.Peek(Token::PEXIT).has_value()) {
             throw NotImplementedException("Conditional EXIT with PEXIT token");
         }
         const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
         AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
         return AnalysisState::Branch;
     }
-    if (const std::optional<Location> exit_pc{block.stack.Peek(Token::PEXIT)}) {
-        const Stack popped_stack{block.stack.Remove(Token::PEXIT)};
-        block.cond = true;
-        block.branch_true = AddLabel(block, popped_stack, *exit_pc, function_id);
-        block.branch_false = UNREACHABLE_BLOCK_ID;
+    if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
+        const Stack popped_stack{block->stack.Remove(Token::PEXIT)};
+        block->cond = IR::Condition{true};
+        block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id);
+        block->branch_false = nullptr;
         return AnalysisState::Branch;
     }
-    block.end = pc;
-    block.end_class = EndClass::Exit;
+    block->end = pc;
+    block->end_class = EndClass::Exit;
     return AnalysisState::Branch;
 }
 
-BlockId CFG::AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id) {
+Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) {
     Function& function{functions[function_id]};
-    if (block.begin == pc) {
-        return block.id;
+    if (block->begin == pc) {
+        // Jumps to itself
+        return block;
     }
-    const auto target{std::ranges::find(function.blocks_data, pc, &Block::begin)};
-    if (target != function.blocks_data.end()) {
-        return target->id;
+    if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) {
+        // Block already exists and it has been visited
+        return &*it;
     }
-    const BlockId block_id{++function.current_block_id};
+    // TODO: FIX DANGLING BLOCKS
+    Block* const new_block{block_pool.Create(Block{
+        .begin{pc},
+        .end{pc},
+        .end_class{EndClass::Branch},
+        .stack{stack},
+        .cond{IR::Condition{true}},
+        .branch_true{nullptr},
+        .branch_false{nullptr},
+        .ir{nullptr},
+    })};
     function.labels.push_back(Label{
         .address{pc},
-        .block_id{block_id},
+        .block{new_block},
         .stack{std::move(stack)},
     });
-    return block_id;
+    return new_block;
 }
 
 std::string CFG::Dot() const {
@@ -581,18 +474,12 @@ std::string CFG::Dot() const {
     for (const Function& function : functions) {
         dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint);
         dot += fmt::format("\t\tnode [style=filled];\n");
-        for (const u32 block_index : function.blocks) {
-            const Block& block{function.blocks_data[block_index]};
+        for (const Block& block : function.blocks) {
             const std::string name{NameOf(block)};
-            const auto add_branch = [&](BlockId branch_id, bool add_label) {
-                const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)};
-                dot += fmt::format("\t\t{}->", name);
-                if (it == function.blocks_data.end()) {
-                    dot += fmt::format("\"Unknown label {}\"", branch_id);
-                } else {
-                    dot += NameOf(*it);
-                };
-                if (add_label && block.cond != true && block.cond != false) {
+            const auto add_branch = [&](Block* branch, bool add_label) {
+                dot += fmt::format("\t\t{}->{}", name, NameOf(*branch));
+                if (add_label && block.cond != IR::Condition{true} &&
+                    block.cond != IR::Condition{false}) {
                     dot += fmt::format(" [label=\"{}\"]", block.cond);
                 }
                 dot += '\n';
@@ -600,10 +487,10 @@ std::string CFG::Dot() const {
             dot += fmt::format("\t\t{};\n", name);
             switch (block.end_class) {
             case EndClass::Branch:
-                if (block.cond != false) {
+                if (block.cond != IR::Condition{false}) {
                     add_branch(block.branch_true, true);
                 }
-                if (block.cond != true) {
+                if (block.cond != IR::Condition{true}) {
                     add_branch(block.branch_false, false);
                 }
                 break;
@@ -619,12 +506,6 @@ std::string CFG::Dot() const {
                                    node_uid);
                 ++node_uid;
                 break;
-            case EndClass::Unreachable:
-                dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
-                dot += fmt::format(
-                    "\t\tN{} [label=\"Unreachable\"][shape=square][style=stripped];\n", node_uid);
-                ++node_uid;
-                break;
             }
         }
         if (function.entrypoint == 8) {
@@ -635,10 +516,11 @@ std::string CFG::Dot() const {
         dot += "\t}\n";
     }
     if (!functions.empty()) {
-        if (functions.front().blocks.empty()) {
+        auto& function{functions.front()};
+        if (function.blocks.empty()) {
             dot += "Start;\n";
         } else {
-            dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front()));
+            dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin()));
         }
         dot += fmt::format("\tStart [shape=diamond];\n");
     }
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
index 49b369282a..8179787b87 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -11,25 +11,27 @@
 #include <vector>
 
 #include <boost/container/small_vector.hpp>
+#include <boost/intrusive/set.hpp>
 
 #include "shader_recompiler/environment.h"
 #include "shader_recompiler/frontend/ir/condition.h"
 #include "shader_recompiler/frontend/maxwell/instruction.h"
 #include "shader_recompiler/frontend/maxwell/location.h"
 #include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::IR {
+class Block;
+}
 
 namespace Shader::Maxwell::Flow {
 
-using BlockId = u32;
 using FunctionId = size_t;
 
-constexpr BlockId UNREACHABLE_BLOCK_ID{static_cast<u32>(-1)};
-
 enum class EndClass {
     Branch,
     Exit,
     Return,
-    Unreachable,
 };
 
 enum class Token {
@@ -59,58 +61,37 @@ private:
     boost::container::small_vector<StackEntry, 3> entries;
 };
 
-struct Block {
+struct Block : boost::intrusive::set_base_hook<
+                   // Normal link is ~2.5% faster compared to safe link
+                   boost::intrusive::link_mode<boost::intrusive::normal_link>> {
     [[nodiscard]] bool Contains(Location pc) const noexcept;
 
+    bool operator<(const Block& rhs) const noexcept {
+        return begin < rhs.begin;
+    }
+
     Location begin;
     Location end;
     EndClass end_class;
-    BlockId id;
     Stack stack;
     IR::Condition cond;
-    BlockId branch_true;
-    BlockId branch_false;
-    boost::container::small_vector<BlockId, 4> imm_predecessors;
-    boost::container::small_vector<BlockId, 8> dominance_frontiers;
-    union {
-        bool post_order_visited{false};
-        Block* imm_dominator;
-    };
+    Block* branch_true;
+    Block* branch_false;
+    IR::Block* ir;
 };
 
 struct Label {
     Location address;
-    BlockId block_id;
+    Block* block;
     Stack stack;
 };
 
 struct Function {
     Function(Location start_address);
 
-    void BuildBlocksMap();
-
-    void BuildImmediatePredecessors();
-
-    void BuildPostOrder();
-
-    void BuildImmediateDominators();
-
-    void BuildDominanceFrontier();
-
-    [[nodiscard]] size_t NumBlocks() const noexcept {
-        return static_cast<size_t>(current_block_id) + 1;
-    }
-
     Location entrypoint;
-    BlockId current_block_id{0};
     boost::container::small_vector<Label, 16> labels;
-    boost::container::small_vector<u32, 0x130> blocks;
-    boost::container::small_vector<Block, 0x130> blocks_data;
-    // Translates from BlockId to block index
-    boost::container::small_vector<Block*, 0x130> blocks_map;
-
-    boost::container::small_vector<u32, 0x130> post_order_blocks;
-    boost::container::small_vector<BlockId, 0x130> post_order_map;
+    boost::intrusive::set<Block> blocks;
 };
 
 class CFG {
@@ -120,7 +101,7 @@ class CFG {
     };
 
 public:
-    explicit CFG(Environment& env, Location start_address);
+    explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address);
 
     CFG& operator=(const CFG&) = delete;
     CFG(const CFG&) = delete;
@@ -133,35 +114,37 @@ public:
     [[nodiscard]] std::span<const Function> Functions() const noexcept {
         return std::span(functions.data(), functions.size());
     }
+    [[nodiscard]] std::span<Function> Functions() noexcept {
+        return std::span(functions.data(), functions.size());
+    }
 
 private:
-    void VisitFunctions(Location start_address);
-
     void AnalyzeLabel(FunctionId function_id, Label& label);
 
     /// Inspect already visited blocks.
     /// Return true when the block has already been visited
     bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
 
-    AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc);
+    AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
 
-    void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class,
+    void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
                          IR::Condition cond);
 
     /// Return true when the branch instruction is confirmed to be a branch
-    bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst,
+    bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
                        Opcode opcode);
 
-    void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst,
+    void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
                     bool is_absolute);
-    void AnalyzeBRX(Block& block, Location pc, Instruction inst, bool is_absolute);
+    void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute);
     void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute);
-    AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst);
+    AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst);
 
     /// Return the branch target block id
-    BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id);
+    Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id);
 
     Environment& env;
+    ObjectPool<Block>& block_pool;
     boost::container::small_vector<Function, 1> functions;
     FunctionId current_function_id{0};
 };
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h
index 66b51a19e6..26d29eae2e 100644
--- a/src/shader_recompiler/frontend/maxwell/location.h
+++ b/src/shader_recompiler/frontend/maxwell/location.h
@@ -15,7 +15,7 @@
 namespace Shader::Maxwell {
 
 class Location {
-    static constexpr u32 VIRTUAL_OFFSET{std::numeric_limits<u32>::max()};
+    static constexpr u32 VIRTUAL_BIAS{4};
 
 public:
     constexpr Location() = default;
@@ -27,12 +27,18 @@ public:
         Align();
     }
 
+    constexpr Location Virtual() const noexcept {
+        Location virtual_location;
+        virtual_location.offset = offset - VIRTUAL_BIAS;
+        return virtual_location;
+    }
+
     [[nodiscard]] constexpr u32 Offset() const noexcept {
         return offset;
     }
 
     [[nodiscard]] constexpr bool IsVirtual() const {
-        return offset == VIRTUAL_OFFSET;
+        return offset % 8 == VIRTUAL_BIAS;
     }
 
     constexpr auto operator<=>(const Location&) const noexcept = default;
@@ -89,7 +95,7 @@ private:
         offset -= 8 + (offset % 32 == 8 ? 8 : 0);
     }
 
-    u32 offset{VIRTUAL_OFFSET};
+    u32 offset{0xcccccccc};
 };
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 8cdd20804e..9fa912ed8e 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -4,57 +4,58 @@
 
 #include <algorithm>
 #include <memory>
+#include <vector>
 
 #include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/structured_control_flow.h"
 #include "shader_recompiler/frontend/maxwell/program.h"
-#include "shader_recompiler/frontend/maxwell/termination_code.h"
 #include "shader_recompiler/frontend/maxwell/translate/translate.h"
 #include "shader_recompiler/ir_opt/passes.h"
 
 namespace Shader::Maxwell {
 namespace {
-void TranslateCode(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
-                   Environment& env, const Flow::Function& cfg_function, IR::Function& function,
-                   std::span<IR::Block*> block_map) {
+IR::BlockList TranslateCode(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
+                            Environment& env, Flow::Function& cfg_function) {
     const size_t num_blocks{cfg_function.blocks.size()};
-    function.blocks.reserve(num_blocks);
-
-    for (const Flow::BlockId block_id : cfg_function.blocks) {
-        const Flow::Block& flow_block{cfg_function.blocks_data[block_id]};
-
-        IR::Block* const ir_block{block_pool.Create(Translate(inst_pool, env, flow_block))};
-        block_map[flow_block.id] = ir_block;
-        function.blocks.emplace_back(ir_block);
-    }
-}
-
-void EmitTerminationInsts(const Flow::Function& cfg_function,
-                          std::span<IR::Block* const> block_map) {
-    for (const Flow::BlockId block_id : cfg_function.blocks) {
-        const Flow::Block& flow_block{cfg_function.blocks_data[block_id]};
-        EmitTerminationCode(flow_block, block_map);
-    }
-}
-
-void TranslateFunction(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
-                       Environment& env, const Flow::Function& cfg_function,
-                       IR::Function& function) {
-    std::vector<IR::Block*> block_map;
-    block_map.resize(cfg_function.blocks_data.size());
-
-    TranslateCode(inst_pool, block_pool, env, cfg_function, function, block_map);
-    EmitTerminationInsts(cfg_function, block_map);
+    std::vector<IR::Block*> blocks(cfg_function.blocks.size());
+    std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable {
+        const u32 begin{cfg_block.begin.Offset()};
+        const u32 end{cfg_block.end.Offset()};
+        blocks[i] = block_pool.Create(inst_pool, begin, end);
+        cfg_block.ir = blocks[i];
+        ++i;
+    });
+    std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable {
+        IR::Block* const block{blocks[i]};
+        ++i;
+        if (cfg_block.end_class != Flow::EndClass::Branch) {
+            block->SetReturn();
+        } else if (cfg_block.cond == IR::Condition{true}) {
+            block->SetBranch(cfg_block.branch_true->ir);
+        } else if (cfg_block.cond == IR::Condition{false}) {
+            block->SetBranch(cfg_block.branch_false->ir);
+        } else {
+            block->SetBranches(cfg_block.cond, cfg_block.branch_true->ir,
+                               cfg_block.branch_false->ir);
+        }
+    });
+    return IR::VisitAST(inst_pool, block_pool, blocks,
+                        [&](IR::Block* block) { Translate(env, block); });
 }
 } // Anonymous namespace
 
 IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
-                             Environment& env, const Flow::CFG& cfg) {
+                             Environment& env, Flow::CFG& cfg) {
     IR::Program program;
     auto& functions{program.functions};
     functions.reserve(cfg.Functions().size());
-    for (const Flow::Function& cfg_function : cfg.Functions()) {
-        TranslateFunction(inst_pool, block_pool, env, cfg_function, functions.emplace_back());
+    for (Flow::Function& cfg_function : cfg.Functions()) {
+        functions.push_back(IR::Function{
+            .blocks{TranslateCode(inst_pool, block_pool, env, cfg_function)},
+        });
     }
+
+    fmt::print(stdout, "No optimizations: {}", IR::DumpProgram(program));
     std::ranges::for_each(functions, Optimization::SsaRewritePass);
     for (IR::Function& function : functions) {
         Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function);
diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h
index 3355ab1299..542621a1de 100644
--- a/src/shader_recompiler/frontend/maxwell/program.h
+++ b/src/shader_recompiler/frontend/maxwell/program.h
@@ -19,6 +19,6 @@ namespace Shader::Maxwell {
 
 [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
                                            ObjectPool<IR::Block>& block_pool, Environment& env,
-                                           const Flow::CFG& cfg);
+                                           Flow::CFG& cfg);
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp
deleted file mode 100644
index ed5137f20c..0000000000
--- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <span>
-
-#include "shader_recompiler/exception.h"
-#include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/ir/ir_emitter.h"
-#include "shader_recompiler/frontend/maxwell/control_flow.h"
-#include "shader_recompiler/frontend/maxwell/termination_code.h"
-
-namespace Shader::Maxwell {
-
-static void EmitExit(IR::IREmitter& ir) {
-    ir.Exit();
-}
-
-static IR::U1 GetFlowTest(IR::FlowTest flow_test, IR::IREmitter& ir) {
-    switch (flow_test) {
-    case IR::FlowTest::T:
-        return ir.Imm1(true);
-    case IR::FlowTest::F:
-        return ir.Imm1(false);
-    case IR::FlowTest::NE:
-        // FIXME: Verify this
-        return ir.LogicalNot(ir.GetZFlag());
-    case IR::FlowTest::NaN:
-        // FIXME: Verify this
-        return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag());
-    default:
-        throw NotImplementedException("Flow test {}", flow_test);
-    }
-}
-
-static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) {
-    const IR::FlowTest flow_test{cond.FlowTest()};
-    const auto [pred, pred_negated]{cond.Pred()};
-    if (pred == IR::Pred::PT && !pred_negated) {
-        return GetFlowTest(flow_test, ir);
-    }
-    if (flow_test == IR::FlowTest::T) {
-        return ir.GetPred(pred, pred_negated);
-    }
-    return ir.LogicalAnd(ir.GetPred(pred, pred_negated), GetFlowTest(flow_test, ir));
-}
-
-static void EmitBranch(const Flow::Block& flow_block, std::span<IR::Block* const> block_map,
-                       IR::IREmitter& ir) {
-    const auto add_immediate_predecessor = [&](Flow::BlockId label) {
-        block_map[label]->AddImmediatePredecessor(&ir.block);
-    };
-    if (flow_block.cond == true) {
-        add_immediate_predecessor(flow_block.branch_true);
-        return ir.Branch(block_map[flow_block.branch_true]);
-    }
-    if (flow_block.cond == false) {
-        add_immediate_predecessor(flow_block.branch_false);
-        return ir.Branch(block_map[flow_block.branch_false]);
-    }
-    add_immediate_predecessor(flow_block.branch_true);
-    add_immediate_predecessor(flow_block.branch_false);
-    return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true],
-                                block_map[flow_block.branch_false]);
-}
-
-void EmitTerminationCode(const Flow::Block& flow_block, std::span<IR::Block* const> block_map) {
-    IR::Block* const block{block_map[flow_block.id]};
-    IR::IREmitter ir(*block);
-    switch (flow_block.end_class) {
-    case Flow::EndClass::Branch:
-        EmitBranch(flow_block, block_map, ir);
-        break;
-    case Flow::EndClass::Exit:
-        EmitExit(ir);
-        break;
-    case Flow::EndClass::Return:
-        ir.Return();
-        break;
-    case Flow::EndClass::Unreachable:
-        ir.Unreachable();
-        break;
-    }
-}
-
-} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h
deleted file mode 100644
index 04e0445340..0000000000
--- a/src/shader_recompiler/frontend/maxwell/termination_code.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <span>
-
-#include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/maxwell/control_flow.h"
-
-namespace Shader::Maxwell {
-
-/// Emit termination instructions and collect immediate predecessors
-void EmitTerminationCode(const Flow::Block& flow_block, std::span<IR::Block* const> block_map);
-
-} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
index d4b417d14a..b752785d44 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -28,7 +28,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
     IR::U32 result;
     if (shl.w != 0) {
         // When .W is set, the shift value is wrapped
-        // To emulate this we just have to clamp it ourselves.
+        // To emulate this we just have to wrap it ourselves.
         const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
         result = v.ir.ShiftLeftLogical(base, shift);
     } else {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
index 7e6bb07a22..f1230f58fe 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
@@ -23,14 +23,13 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) {
     }
 }
 
-IR::Block Translate(ObjectPool<IR::Inst>& inst_pool, Environment& env,
-                    const Flow::Block& flow_block) {
-    IR::Block block{inst_pool, flow_block.begin.Offset(), flow_block.end.Offset()};
-    TranslatorVisitor visitor{env, block};
-
-    const Location pc_end{flow_block.end};
-    Location pc{flow_block.begin};
-    while (pc != pc_end) {
+void Translate(Environment& env, IR::Block* block) {
+    if (block->IsVirtual()) {
+        return;
+    }
+    TranslatorVisitor visitor{env, *block};
+    const Location pc_end{block->LocationEnd()};
+    for (Location pc = block->LocationBegin(); pc != pc_end; ++pc) {
         const u64 insn{env.ReadInstruction(pc.Offset())};
         const Opcode opcode{Decode(insn)};
         switch (opcode) {
@@ -43,9 +42,7 @@ IR::Block Translate(ObjectPool<IR::Inst>& inst_pool, Environment& env,
         default:
             throw LogicError("Invalid opcode {}", opcode);
         }
-        ++pc;
     }
-    return block;
 }
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h
index c1c21b2782..e1aa2e0f4b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/translate.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h
@@ -6,14 +6,9 @@
 
 #include "shader_recompiler/environment.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
-#include "shader_recompiler/frontend/maxwell/control_flow.h"
-#include "shader_recompiler/frontend/maxwell/location.h"
-#include "shader_recompiler/object_pool.h"
 
 namespace Shader::Maxwell {
 
-[[nodiscard]] IR::Block Translate(ObjectPool<IR::Inst>& inst_pool, Environment& env,
-                                  const Flow::Block& flow_block);
+void Translate(Environment& env, IR::Block* block);
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index f1170c61ee..9fba6ac239 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -132,6 +132,32 @@ void FoldLogicalAnd(IR::Inst& inst) {
     }
 }
 
+void FoldLogicalOr(IR::Inst& inst) {
+    if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) {
+        return;
+    }
+    const IR::Value rhs{inst.Arg(1)};
+    if (rhs.IsImmediate()) {
+        if (rhs.U1()) {
+            inst.ReplaceUsesWith(IR::Value{true});
+        } else {
+            inst.ReplaceUsesWith(inst.Arg(0));
+        }
+    }
+}
+
+void FoldLogicalNot(IR::Inst& inst) {
+    const IR::U1 value{inst.Arg(0)};
+    if (value.IsImmediate()) {
+        inst.ReplaceUsesWith(IR::Value{!value.U1()});
+        return;
+    }
+    IR::Inst* const arg{value.InstRecursive()};
+    if (arg->Opcode() == IR::Opcode::LogicalNot) {
+        inst.ReplaceUsesWith(arg->Arg(0));
+    }
+}
+
 template <typename Dest, typename Source>
 void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
     const IR::Value value{inst.Arg(0)};
@@ -160,6 +186,24 @@ void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
     inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
 }
 
+void FoldBranchConditional(IR::Inst& inst) {
+    const IR::U1 cond{inst.Arg(0)};
+    if (cond.IsImmediate()) {
+        // TODO: Convert to Branch
+        return;
+    }
+    const IR::Inst* cond_inst{cond.InstRecursive()};
+    if (cond_inst->Opcode() == IR::Opcode::LogicalNot) {
+        const IR::Value true_label{inst.Arg(1)};
+        const IR::Value false_label{inst.Arg(2)};
+        // Remove negation on the conditional (take the parameter out of LogicalNot) and swap
+        // the branches
+        inst.SetArg(0, cond_inst->Arg(0));
+        inst.SetArg(1, false_label);
+        inst.SetArg(2, true_label);
+    }
+}
+
 void ConstantPropagation(IR::Inst& inst) {
     switch (inst.Opcode()) {
     case IR::Opcode::GetRegister:
@@ -178,6 +222,10 @@ void ConstantPropagation(IR::Inst& inst) {
         return FoldSelect<u32>(inst);
     case IR::Opcode::LogicalAnd:
         return FoldLogicalAnd(inst);
+    case IR::Opcode::LogicalOr:
+        return FoldLogicalOr(inst);
+    case IR::Opcode::LogicalNot:
+        return FoldLogicalNot(inst);
     case IR::Opcode::ULessThan:
         return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
     case IR::Opcode::BitFieldUExtract:
@@ -188,6 +236,8 @@ void ConstantPropagation(IR::Inst& inst) {
             }
             return (base >> shift) & ((1U << count) - 1);
         });
+    case IR::Opcode::BranchConditional:
+        return FoldBranchConditional(inst);
     default:
         break;
     }
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index 15a9db90a9..8ca996e935 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -34,6 +34,13 @@ struct SignFlagTag : FlagTag {};
 struct CarryFlagTag : FlagTag {};
 struct OverflowFlagTag : FlagTag {};
 
+struct GotoVariable : FlagTag {
+    GotoVariable() = default;
+    explicit GotoVariable(u32 index_) : index{index_} {}
+
+    u32 index;
+};
+
 struct DefTable {
     [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept {
         return regs[IR::RegIndex(variable)];
@@ -43,6 +50,10 @@ struct DefTable {
         return preds[IR::PredIndex(variable)];
     }
 
+    [[nodiscard]] ValueMap& operator[](GotoVariable goto_variable) {
+        return goto_vars[goto_variable.index];
+    }
+
     [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept {
         return zero_flag;
     }
@@ -61,6 +72,7 @@ struct DefTable {
 
     std::array<ValueMap, IR::NUM_USER_REGS> regs;
     std::array<ValueMap, IR::NUM_USER_PREDS> preds;
+    boost::container::flat_map<u32, ValueMap> goto_vars;
     ValueMap zero_flag;
     ValueMap sign_flag;
     ValueMap carry_flag;
@@ -68,15 +80,15 @@ struct DefTable {
 };
 
 IR::Opcode UndefOpcode(IR::Reg) noexcept {
-    return IR::Opcode::Undef32;
+    return IR::Opcode::UndefU32;
 }
 
 IR::Opcode UndefOpcode(IR::Pred) noexcept {
-    return IR::Opcode::Undef1;
+    return IR::Opcode::UndefU1;
 }
 
 IR::Opcode UndefOpcode(const FlagTag&) noexcept {
-    return IR::Opcode::Undef1;
+    return IR::Opcode::UndefU1;
 }
 
 [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {
@@ -165,6 +177,9 @@ void SsaRewritePass(IR::Function& function) {
                     pass.WriteVariable(pred, block, inst.Arg(1));
                 }
                 break;
+            case IR::Opcode::SetGotoVariable:
+                pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
+                break;
             case IR::Opcode::SetZFlag:
                 pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
                 break;
@@ -187,6 +202,9 @@ void SsaRewritePass(IR::Function& function) {
                     inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
                 }
                 break;
+            case IR::Opcode::GetGotoVariable:
+                inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
+                break;
             case IR::Opcode::GetZFlag:
                 inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
                 break;
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
index 8a5adf5a23..32b56eb57c 100644
--- a/src/shader_recompiler/ir_opt/verification_pass.cpp
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -14,6 +14,10 @@ namespace Shader::Optimization {
 static void ValidateTypes(const IR::Function& function) {
     for (const auto& block : function.blocks) {
         for (const IR::Inst& inst : *block) {
+            if (inst.Opcode() == IR::Opcode::Phi) {
+                // Skip validation on phi nodes
+                continue;
+            }
             const size_t num_args{inst.NumArgs()};
             for (size_t i = 0; i < num_args; ++i) {
                 const IR::Type t1{inst.Arg(i).Type()};
diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp
index 9887e066d2..3ca1677c4a 100644
--- a/src/shader_recompiler/main.cpp
+++ b/src/shader_recompiler/main.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <chrono>
 #include <filesystem>
 
 #include <fmt/format.h>
@@ -36,34 +37,46 @@ void RunDatabase() {
     ForEachFile("D:\\Shaders\\Database", [&](const std::filesystem::path& path) {
         map.emplace_back(std::make_unique<FileEnvironment>(path.string().c_str()));
     });
-    for (int i = 0; i < 300; ++i) {
+    auto block_pool{std::make_unique<ObjectPool<Flow::Block>>()};
+    auto t0 = std::chrono::high_resolution_clock::now();
+    int N = 1;
+    int n = 0;
+    for (int i = 0; i < N; ++i) {
         for (auto& env : map) {
+            ++n;
             // fmt::print(stdout, "Decoding {}\n", path.string());
+
             const Location start_address{0};
-            auto cfg{std::make_unique<Flow::CFG>(*env, start_address)};
+            block_pool->ReleaseContents();
+            Flow::CFG cfg{*env, *block_pool, start_address};
             // fmt::print(stdout, "{}\n", cfg->Dot());
             // IR::Program program{env, cfg};
             // Optimize(program);
             // const std::string code{EmitGLASM(program)};
         }
     }
+    auto t = std::chrono::high_resolution_clock::now();
+    fmt::print(stdout, "{} ms",
+               std::chrono::duration_cast<std::chrono::milliseconds>(t - t0).count() / double(N));
 }
 
 int main() {
     // RunDatabase();
 
+    auto flow_block_pool{std::make_unique<ObjectPool<Flow::Block>>()};
     auto inst_pool{std::make_unique<ObjectPool<IR::Inst>>()};
     auto block_pool{std::make_unique<ObjectPool<IR::Block>>()};
 
-    // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"};
-    FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS15C2FB1F0B965767.bin"};
+    FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"};
+    // FileEnvironment env{"D:\\Shaders\\shader.bin"};
     for (int i = 0; i < 1; ++i) {
         block_pool->ReleaseContents();
         inst_pool->ReleaseContents();
-        auto cfg{std::make_unique<Flow::CFG>(env, 0)};
-        // fmt::print(stdout, "{}\n", cfg->Dot());
-        IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, *cfg)};
-        // fmt::print(stdout, "{}\n", IR::DumpProgram(program));
+        flow_block_pool->ReleaseContents();
+        Flow::CFG cfg{env, *flow_block_pool, 0};
+        fmt::print(stdout, "{}\n", cfg.Dot());
+        IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)};
+        fmt::print(stdout, "{}\n", IR::DumpProgram(program));
         Backend::SPIRV::EmitSPIRV spirv{program};
     }
 }
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
new file mode 100644
index 0000000000..1760bf4a96
--- /dev/null
+++ b/src/shader_recompiler/shader_info.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+
+#include <boost/container/static_vector.hpp>
+
+namespace Shader {
+
+struct Info {
+    struct ConstantBuffer {
+
+    };
+
+    struct {
+        bool workgroup_id{};
+        bool local_invocation_id{};
+        bool fp16{};
+        bool fp64{};
+    } uses;
+
+    std::array<18
+};
+
+} // namespace Shader
-- 
cgit v1.2.3-70-g09d2


From 8af9297f0972d0aaa8306369c5d04926b886a89e Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 14 Feb 2021 01:24:32 -0300
Subject: shader: Misc fixes

---
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |   6 ++
 src/shader_recompiler/backend/spirv/emit_spirv.h   |   5 +
 src/shader_recompiler/frontend/ir/basic_block.cpp  |   4 +-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |   2 +
 .../frontend/ir/microinstruction.cpp               |  16 +--
 .../maxwell/translate/impl/integer_add.cpp         |   4 +-
 .../translate/impl/integer_set_predicate.cpp       |   4 +-
 .../ir_opt/constant_propagation_pass.cpp           |  27 ++---
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  | 113 +++++++++++----------
 src/shader_recompiler/main.cpp                     |  12 +--
 10 files changed, 104 insertions(+), 89 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 5022b51597..e29e448c7c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -25,6 +25,9 @@ EmitContext::EmitContext(IR::Program& program) {
     f16.Define(*this, TypeFloat(16), "f16");
     f64.Define(*this, TypeFloat(64), "f64");
 
+    true_value = ConstantTrue(u1);
+    false_value = ConstantFalse(u1);
+
     for (const IR::Function& function : program.functions) {
         for (IR::Block* const block : function.blocks) {
             block_label_map.emplace_back(block, OpLabel());
@@ -58,6 +61,7 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) {
     std::fclose(file);
     std::system("spirv-dis shader.spv");
     std::system("spirv-val shader.spv");
+    std::system("spirv-cross shader.spv");
 }
 
 template <auto method>
@@ -109,6 +113,8 @@ static Id TypeId(const EmitContext& ctx, IR::Type type) {
     switch (type) {
     case IR::Type::U1:
         return ctx.u1;
+    case IR::Type::U32:
+        return ctx.u32[1];
     default:
         throw NotImplementedException("Phi node type {}", type);
     }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 9aa83b5de4..46ec7a1bb8 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -79,6 +79,8 @@ public:
             return def_map.Consume(value.Inst());
         }
         switch (value.Type()) {
+        case IR::Type::U1:
+            return value.U1() ? true_value : false_value;
         case IR::Type::U32:
             return Constant(u32[1], value.U32());
         case IR::Type::F32:
@@ -108,6 +110,9 @@ public:
     VectorTypes f16;
     VectorTypes f64;
 
+    Id true_value{};
+    Id false_value{};
+
     Id workgroup_id{};
     Id local_invocation_id{};
 
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index b5616f3941..c976267128 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -113,7 +113,7 @@ static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_ind
     if (arg.IsLabel()) {
         return BlockToIndex(block_to_index, arg.Label());
     }
-    if (!arg.IsImmediate()) {
+    if (!arg.IsImmediate() || arg.IsIdentity()) {
         return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst()));
     }
     switch (arg.Type()) {
@@ -166,7 +166,7 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
             const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)};
             ret += arg_index != 0 ? ", " : " ";
             if (op == Opcode::Phi) {
-                ret += fmt::format("[ {}, {} ]", arg_index,
+                ret += fmt::format("[ {}, {} ]", arg_str,
                                    BlockToIndex(block_to_index, inst.PhiBlock(arg_index)));
             } else {
                 ret += arg_str;
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 30932043f9..f42489d41d 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -46,10 +46,12 @@ F64 IREmitter::Imm64(f64 value) const {
 
 void IREmitter::Branch(Block* label) {
     label->AddImmediatePredecessor(block);
+    block->SetBranch(label);
     Inst(Opcode::Branch, label);
 }
 
 void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) {
+    block->SetBranches(IR::Condition{true}, true_label, false_label);
     true_label->AddImmediatePredecessor(block);
     false_label->AddImmediatePredecessor(block);
     Inst(Opcode::BranchConditional, condition, true_label, false_label);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index b4ae371bd1..9279b96928 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -143,19 +143,21 @@ Value Inst::Arg(size_t index) const {
 }
 
 void Inst::SetArg(size_t index, Value value) {
-    if (op == Opcode::Phi) {
-        throw LogicError("Setting argument on a phi instruction");
-    }
-    if (index >= NumArgsOf(op)) {
+    if (index >= NumArgs()) {
         throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
     }
-    if (!args[index].IsImmediate()) {
-        UndoUse(args[index]);
+    const IR::Value arg{Arg(index)};
+    if (!arg.IsImmediate()) {
+        UndoUse(arg);
     }
     if (!value.IsImmediate()) {
         Use(value);
     }
-    args[index] = value;
+    if (op == Opcode::Phi) {
+        phi_args[index].second = value;
+    } else {
+        args[index] = value;
+    }
 }
 
 Block* Inst::PhiBlock(size_t index) const {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
index 60f79b1606..623e78ff85 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -76,8 +76,8 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
 }
 } // Anonymous namespace
 
-void TranslatorVisitor::IADD_reg(u64) {
-    throw NotImplementedException("IADD (reg)");
+void TranslatorVisitor::IADD_reg(u64 insn) {
+    IADD(*this, insn, GetReg20(insn));
 }
 
 void TranslatorVisitor::IADD_cbuf(u64 insn) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
index 76c6b52910..1bc9ef3635 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -92,8 +92,8 @@ void TranslatorVisitor::ISETP_cbuf(u64 insn) {
     ISETP(*this, insn, GetCbuf(insn));
 }
 
-void TranslatorVisitor::ISETP_imm(u64) {
-    throw NotImplementedException("ISETP_imm");
+void TranslatorVisitor::ISETP_imm(u64 insn) {
+    ISETP(*this, insn, GetImm20(insn));
 }
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 9fba6ac239..cbde65b9b4 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -32,6 +32,8 @@ template <typename T>
         return value.U1();
     } else if constexpr (std::is_same_v<T, u32>) {
         return value.U32();
+    } else if constexpr (std::is_same_v<T, s32>) {
+        return static_cast<s32>(value.U32());
     } else if constexpr (std::is_same_v<T, f32>) {
         return value.F32();
     } else if constexpr (std::is_same_v<T, u64>) {
@@ -39,17 +41,8 @@ template <typename T>
     }
 }
 
-template <typename ImmFn>
+template <typename T, typename ImmFn>
 bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
-    const auto arg = [](const IR::Value& value) {
-        if constexpr (std::is_invocable_r_v<bool, ImmFn, bool, bool>) {
-            return value.U1();
-        } else if constexpr (std::is_invocable_r_v<u32, ImmFn, u32, u32>) {
-            return value.U32();
-        } else if constexpr (std::is_invocable_r_v<u64, ImmFn, u64, u64>) {
-            return value.U64();
-        }
-    };
     const IR::Value lhs{inst.Arg(0)};
     const IR::Value rhs{inst.Arg(1)};
 
@@ -57,14 +50,14 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
     const bool is_rhs_immediate{rhs.IsImmediate()};
 
     if (is_lhs_immediate && is_rhs_immediate) {
-        const auto result{imm_fn(arg(lhs), arg(rhs))};
+        const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))};
         inst.ReplaceUsesWith(IR::Value{result});
         return false;
     }
     if (is_lhs_immediate && !is_rhs_immediate) {
         IR::Inst* const rhs_inst{rhs.InstRecursive()};
         if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) {
-            const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))};
+            const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
             inst.SetArg(0, rhs_inst->Arg(0));
             inst.SetArg(1, IR::Value{combined});
         } else {
@@ -76,7 +69,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
     if (!is_lhs_immediate && is_rhs_immediate) {
         const IR::Inst* const lhs_inst{lhs.InstRecursive()};
         if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) {
-            const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))};
+            const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
             inst.SetArg(0, lhs_inst->Arg(0));
             inst.SetArg(1, IR::Value{combined});
         }
@@ -101,7 +94,7 @@ void FoldAdd(IR::Inst& inst) {
     if (inst.HasAssociatedPseudoOperation()) {
         return;
     }
-    if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) {
+    if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) {
         return;
     }
     const IR::Value rhs{inst.Arg(1)};
@@ -119,7 +112,7 @@ void FoldSelect(IR::Inst& inst) {
 }
 
 void FoldLogicalAnd(IR::Inst& inst) {
-    if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) {
+    if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) {
         return;
     }
     const IR::Value rhs{inst.Arg(1)};
@@ -133,7 +126,7 @@ void FoldLogicalAnd(IR::Inst& inst) {
 }
 
 void FoldLogicalOr(IR::Inst& inst) {
-    if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) {
+    if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) {
         return;
     }
     const IR::Value rhs{inst.Arg(1)};
@@ -226,6 +219,8 @@ void ConstantPropagation(IR::Inst& inst) {
         return FoldLogicalOr(inst);
     case IR::Opcode::LogicalNot:
         return FoldLogicalNot(inst);
+    case IR::Opcode::SLessThan:
+        return FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
     case IR::Opcode::ULessThan:
         return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
     case IR::Opcode::BitFieldUExtract:
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index 8ca996e935..7eaf719c4e 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -113,6 +113,7 @@ private:
     IR::Value ReadVariableRecursive(auto variable, IR::Block* block) {
         IR::Value val;
         if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) {
+            // Optimize the common case of one predecessor: no phi needed
             val = ReadVariable(variable, preds.front());
         } else {
             // Break potential cycles with operandless phi
@@ -160,66 +161,70 @@ private:
 
     DefTable current_def;
 };
+
+void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
+    switch (inst.Opcode()) {
+    case IR::Opcode::SetRegister:
+        if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
+            pass.WriteVariable(reg, block, inst.Arg(1));
+        }
+        break;
+    case IR::Opcode::SetPred:
+        if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
+            pass.WriteVariable(pred, block, inst.Arg(1));
+        }
+        break;
+    case IR::Opcode::SetGotoVariable:
+        pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
+        break;
+    case IR::Opcode::SetZFlag:
+        pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
+        break;
+    case IR::Opcode::SetSFlag:
+        pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
+        break;
+    case IR::Opcode::SetCFlag:
+        pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
+        break;
+    case IR::Opcode::SetOFlag:
+        pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
+        break;
+    case IR::Opcode::GetRegister:
+        if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
+            inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
+        }
+        break;
+    case IR::Opcode::GetPred:
+        if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
+            inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
+        }
+        break;
+    case IR::Opcode::GetGotoVariable:
+        inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
+        break;
+    case IR::Opcode::GetZFlag:
+        inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
+        break;
+    case IR::Opcode::GetSFlag:
+        inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
+        break;
+    case IR::Opcode::GetCFlag:
+        inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
+        break;
+    case IR::Opcode::GetOFlag:
+        inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
+        break;
+    default:
+        break;
+    }
+}
 } // Anonymous namespace
 
 void SsaRewritePass(IR::Function& function) {
     Pass pass;
     for (IR::Block* const block : function.blocks) {
         for (IR::Inst& inst : block->Instructions()) {
-            switch (inst.Opcode()) {
-            case IR::Opcode::SetRegister:
-                if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
-                    pass.WriteVariable(reg, block, inst.Arg(1));
-                }
-                break;
-            case IR::Opcode::SetPred:
-                if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
-                    pass.WriteVariable(pred, block, inst.Arg(1));
-                }
-                break;
-            case IR::Opcode::SetGotoVariable:
-                pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
-                break;
-            case IR::Opcode::SetZFlag:
-                pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
-                break;
-            case IR::Opcode::SetSFlag:
-                pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
-                break;
-            case IR::Opcode::SetCFlag:
-                pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
-                break;
-            case IR::Opcode::SetOFlag:
-                pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
-                break;
-            case IR::Opcode::GetRegister:
-                if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
-                    inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
-                }
-                break;
-            case IR::Opcode::GetPred:
-                if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
-                    inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
-                }
-                break;
-            case IR::Opcode::GetGotoVariable:
-                inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
-                break;
-            case IR::Opcode::GetZFlag:
-                inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
-                break;
-            case IR::Opcode::GetSFlag:
-                inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
-                break;
-            case IR::Opcode::GetCFlag:
-                inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
-                break;
-            case IR::Opcode::GetOFlag:
-                inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
-                break;
-            default:
-                break;
-            }
+            VisitInst(pass, block, inst);
         }
     }
 }
diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp
index 3ca1677c4a..92358232c3 100644
--- a/src/shader_recompiler/main.cpp
+++ b/src/shader_recompiler/main.cpp
@@ -38,7 +38,8 @@ void RunDatabase() {
         map.emplace_back(std::make_unique<FileEnvironment>(path.string().c_str()));
     });
     auto block_pool{std::make_unique<ObjectPool<Flow::Block>>()};
-    auto t0 = std::chrono::high_resolution_clock::now();
+    using namespace std::chrono;
+    auto t0 = high_resolution_clock::now();
     int N = 1;
     int n = 0;
     for (int i = 0; i < N; ++i) {
@@ -55,9 +56,8 @@ void RunDatabase() {
             // const std::string code{EmitGLASM(program)};
         }
     }
-    auto t = std::chrono::high_resolution_clock::now();
-    fmt::print(stdout, "{} ms",
-               std::chrono::duration_cast<std::chrono::milliseconds>(t - t0).count() / double(N));
+    auto t = high_resolution_clock::now();
+    fmt::print(stdout, "{} ms", duration_cast<milliseconds>(t - t0).count() / double(N));
 }
 
 int main() {
@@ -67,8 +67,8 @@ int main() {
     auto inst_pool{std::make_unique<ObjectPool<IR::Inst>>()};
     auto block_pool{std::make_unique<ObjectPool<IR::Block>>()};
 
-    FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"};
-    // FileEnvironment env{"D:\\Shaders\\shader.bin"};
+    // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"};
+    FileEnvironment env{"D:\\Shaders\\shader.bin"};
     for (int i = 0; i < 1; ++i) {
         block_pool->ReleaseContents();
         inst_pool->ReleaseContents();
-- 
cgit v1.2.3-70-g09d2


From 1b0cf2309c760c1cb97a230a1572f8e87f84444a Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 14 Feb 2021 22:46:40 -0300
Subject: shader: Add support for forward declarations

---
 externals/sirit                                    |  2 +-
 src/shader_recompiler/backend/spirv/emit_spirv.cpp | 61 +++++++++++++++++-----
 src/shader_recompiler/backend/spirv/emit_spirv.h   | 40 +-------------
 src/shader_recompiler/frontend/ir/basic_block.cpp  |  2 +-
 src/shader_recompiler/frontend/ir/basic_block.h    |  2 +-
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  4 +-
 .../frontend/ir/microinstruction.cpp               |  2 +-
 .../frontend/ir/microinstruction.h                 | 20 +++++--
 src/shader_recompiler/frontend/ir/modifiers.h      | 10 ++--
 .../global_memory_to_storage_buffer_pass.cpp       |  4 +-
 src/shader_recompiler/main.cpp                     |  2 +-
 11 files changed, 80 insertions(+), 69 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/externals/sirit b/externals/sirit
index c374bfd9fd..f819ade0ef 160000
--- a/externals/sirit
+++ b/externals/sirit
@@ -1 +1 @@
-Subproject commit c374bfd9fdff02a0cff85d005488967b1b0f675e
+Subproject commit f819ade0efe925a782090dea9e1bf300fedffb39
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index e29e448c7c..0895414b4d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -64,31 +64,49 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) {
     std::system("spirv-cross shader.spv");
 }
 
+template <auto method, typename... Args>
+static void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) {
+    const Id forward_id{inst->Definition<Id>()};
+    const bool has_forward_id{Sirit::ValidId(forward_id)};
+    Id current_id{};
+    if (has_forward_id) {
+        current_id = ctx.ExchangeCurrentId(forward_id);
+    }
+    const Id new_id{(emit.*method)(ctx, std::forward<Args>(args)...)};
+    if (has_forward_id) {
+        ctx.ExchangeCurrentId(current_id);
+    } else {
+        inst->SetDefinition<Id>(new_id);
+    }
+}
+
 template <auto method>
 static void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) {
     using M = decltype(method);
     using std::is_invocable_r_v;
     if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&>) {
-        ctx.Define(inst, (emit.*method)(ctx));
+        SetDefinition<method>(emit, ctx, inst);
     } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, Id>) {
-        ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0))));
+        SetDefinition<method>(emit, ctx, inst, ctx.Def(inst->Arg(0)));
     } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, Id, Id>) {
-        ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1))));
+        SetDefinition<method>(emit, ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)));
     } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, Id, Id, Id>) {
-        ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)),
-                                        ctx.Def(inst->Arg(2))));
+        SetDefinition<method>(emit, ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)),
+                              ctx.Def(inst->Arg(2)));
+    } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, IR::Inst*>) {
+        SetDefinition<method>(emit, ctx, inst, inst);
     } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, IR::Inst*, Id, Id>) {
-        ctx.Define(inst, (emit.*method)(ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1))));
+        SetDefinition<method>(emit, ctx, inst, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)));
     } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, IR::Inst*, Id, Id, Id>) {
-        ctx.Define(inst, (emit.*method)(ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)),
-                                        ctx.Def(inst->Arg(2))));
+        SetDefinition<method>(emit, ctx, inst, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)),
+                              ctx.Def(inst->Arg(2)));
     } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, Id, u32>) {
-        ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), inst->Arg(1).U32()));
+        SetDefinition<method>(emit, ctx, inst, ctx.Def(inst->Arg(0)), inst->Arg(1).U32());
     } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, const IR::Value&>) {
-        ctx.Define(inst, (emit.*method)(ctx, inst->Arg(0)));
+        SetDefinition<method>(emit, ctx, inst, inst->Arg(0));
     } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, const IR::Value&,
                                           const IR::Value&>) {
-        ctx.Define(inst, (emit.*method)(ctx, inst->Arg(0), inst->Arg(1)));
+        SetDefinition<method>(emit, ctx, inst, inst->Arg(0), inst->Arg(1));
     } else if constexpr (is_invocable_r_v<void, M, EmitSPIRV&, EmitContext&, IR::Inst*>) {
         (emit.*method)(ctx, inst);
     } else if constexpr (is_invocable_r_v<void, M, EmitSPIRV&, EmitContext&>) {
@@ -122,11 +140,28 @@ static Id TypeId(const EmitContext& ctx, IR::Type type) {
 
 Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) {
     const size_t num_args{inst->NumArgs()};
-    boost::container::small_vector<Id, 64> operands;
+    boost::container::small_vector<Id, 32> operands;
     operands.reserve(num_args * 2);
     for (size_t index = 0; index < num_args; ++index) {
+        // Phi nodes can have forward declarations, if an argument is not defined provide a forward
+        // declaration of it. Invoke will take care of giving it the right definition when it's
+        // actually defined.
+        const IR::Value arg{inst->Arg(index)};
+        Id def{};
+        if (arg.IsImmediate()) {
+            // Let the context handle immediate definitions, as it already knows how
+            def = ctx.Def(arg);
+        } else {
+            IR::Inst* const arg_inst{arg.Inst()};
+            def = arg_inst->Definition<Id>();
+            if (!Sirit::ValidId(def)) {
+                // If it hasn't been defined, get a forward declaration
+                def = ctx.ForwardDeclarationId();
+                arg_inst->SetDefinition<Id>(def);
+            }
+        }
         IR::Block* const phi_block{inst->PhiBlock(index)};
-        operands.push_back(ctx.Def(inst->Arg(index)));
+        operands.push_back(def);
         operands.push_back(ctx.BlockLabel(phi_block));
     }
     const Id result_type{TypeId(ctx, inst->Arg(0).Type())};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 6b09757d12..7d76377b52 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -6,8 +6,6 @@
 
 #include <sirit/sirit.h>
 
-#include <boost/container/flat_map.hpp>
-
 #include "common/common_types.h"
 #include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/program.h"
@@ -16,37 +14,6 @@ namespace Shader::Backend::SPIRV {
 
 using Sirit::Id;
 
-class DefMap {
-public:
-    void Define(IR::Inst* inst, Id def_id) {
-        const InstInfo info{.use_count{inst->UseCount()}, .def_id{def_id}};
-        const auto it{map.insert(map.end(), std::make_pair(inst, info))};
-        if (it == map.end()) {
-            throw LogicError("Defining already defined instruction");
-        }
-    }
-
-    [[nodiscard]] Id Consume(IR::Inst* inst) {
-        const auto it{map.find(inst)};
-        if (it == map.end()) {
-            throw LogicError("Consuming undefined instruction");
-        }
-        const Id def_id{it->second.def_id};
-        if (--it->second.use_count == 0) {
-            map.erase(it);
-        }
-        return def_id;
-    }
-
-private:
-    struct InstInfo {
-        int use_count;
-        Id def_id;
-    };
-
-    boost::container::flat_map<IR::Inst*, InstInfo> map;
-};
-
 class VectorTypes {
 public:
     void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@@ -76,7 +43,7 @@ public:
 
     [[nodiscard]] Id Def(const IR::Value& value) {
         if (!value.IsImmediate()) {
-            return def_map.Consume(value.Inst());
+            return value.Inst()->Definition<Id>();
         }
         switch (value.Type()) {
         case IR::Type::U1:
@@ -90,10 +57,6 @@ public:
         }
     }
 
-    void Define(IR::Inst* inst, Id def_id) {
-        def_map.Define(inst, def_id);
-    }
-
     [[nodiscard]] Id BlockLabel(IR::Block* block) const {
         const auto it{std::ranges::lower_bound(block_label_map, block, {},
                                                &std::pair<IR::Block*, Id>::first)};
@@ -117,7 +80,6 @@ public:
     Id local_invocation_id{};
 
 private:
-    DefMap def_map;
     std::vector<std::pair<IR::Block*, Id>> block_label_map;
 };
 
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index c976267128..5ae91dd7dc 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -26,7 +26,7 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
 }
 
 Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
-                                      std::initializer_list<Value> args, u64 flags) {
+                                      std::initializer_list<Value> args, u32 flags) {
     Inst* const inst{inst_pool->Create(op, flags)};
     const auto result_it{instructions.insert(insertion_point, *inst)};
 
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
index 3205705e79..778b32e432 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.h
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -42,7 +42,7 @@ public:
 
     /// Prepends a new instruction to this basic block before the insertion point.
     iterator PrependNewInst(iterator insertion_point, Opcode op,
-                            std::initializer_list<Value> args = {}, u64 flags = 0);
+                            std::initializer_list<Value> args = {}, u32 flags = 0);
 
     /// Set the branches to jump to when all instructions have executed.
     void SetBranches(Condition cond, Block* branch_true, Block* branch_false);
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 4decb46bc1..24b012a393 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -178,7 +178,7 @@ private:
     }
 
     template <typename T>
-    requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v<T>) struct Flags {
+    requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags {
         Flags() = default;
         Flags(T proxy_) : proxy{proxy_} {}
 
@@ -187,7 +187,7 @@ private:
 
     template <typename T = Value, typename FlagType, typename... Args>
     T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
-        u64 raw_flags{};
+        u32 raw_flags{};
         std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
         auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
         return T{Value{&*it}};
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 9279b96928..ee76db9adf 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -31,7 +31,7 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode)
     inst = nullptr;
 }
 
-Inst::Inst(IR::Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {
+Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
     if (op == Opcode::Phi) {
         std::construct_at(&phi_args);
     } else {
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index ddf0f90a9e..5b244fa0bf 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -12,6 +12,7 @@
 
 #include <boost/intrusive/list.hpp>
 
+#include "common/bit_cast.h"
 #include "common/common_types.h"
 #include "shader_recompiler/frontend/ir/opcodes.h"
 #include "shader_recompiler/frontend/ir/type.h"
@@ -25,7 +26,7 @@ constexpr size_t MAX_ARG_COUNT = 4;
 
 class Inst : public boost::intrusive::list_base_hook<> {
 public:
-    explicit Inst(Opcode op_, u64 flags_) noexcept;
+    explicit Inst(Opcode op_, u32 flags_) noexcept;
     ~Inst();
 
     Inst& operator=(const Inst&) = delete;
@@ -86,13 +87,25 @@ public:
     void ReplaceUsesWith(Value replacement);
 
     template <typename FlagsType>
-    requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v<FlagsType>)
+    requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
         [[nodiscard]] FlagsType Flags() const noexcept {
         FlagsType ret;
         std::memcpy(&ret, &flags, sizeof(ret));
         return ret;
     }
 
+    /// Intrusively store the host definition of this instruction.
+    template <typename DefinitionType>
+    void SetDefinition(DefinitionType def) {
+        definition = Common::BitCast<u32>(def);
+    }
+
+    /// Return the intrusively stored host definition of this instruction.
+    template <typename DefinitionType>
+    [[nodiscard]] DefinitionType Definition() const noexcept {
+        return Common::BitCast<DefinitionType>(definition);
+    }
+
 private:
     struct NonTriviallyDummy {
         NonTriviallyDummy() noexcept {}
@@ -103,7 +116,8 @@ private:
 
     IR::Opcode op{};
     int use_count{};
-    u64 flags{};
+    u32 flags{};
+    u32 definition{};
     union {
         NonTriviallyDummy dummy{};
         std::array<Value, MAX_ARG_COUNT> args;
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
index 28bb9e798c..c288eede03 100644
--- a/src/shader_recompiler/frontend/ir/modifiers.h
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -6,13 +6,13 @@
 
 namespace Shader::IR {
 
-enum class FmzMode {
+enum class FmzMode : u8 {
     None, // Denorms are not flushed, NAN is propagated (nouveau)
     FTZ,  // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
     FMZ,  // Flush denorms to zero, x * 0 == 0 (D3D9)
 };
 
-enum class FpRounding {
+enum class FpRounding : u8 {
     RN, // Round to nearest even,
     RM, // Round towards negative infinity
     RP, // Round towards positive infinity
@@ -21,8 +21,8 @@ enum class FpRounding {
 
 struct FpControl {
     bool no_contraction{false};
-    FpRounding rounding : 8 = FpRounding::RN;
-    FmzMode fmz_mode : 8 = FmzMode::FTZ;
+    FpRounding rounding{FpRounding::RN};
+    FmzMode fmz_mode{FmzMode::FTZ};
 };
-static_assert(sizeof(FpControl) <= sizeof(u64));
+static_assert(sizeof(FpControl) <= sizeof(u32));
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 34393e1d57..08fd364bb4 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -161,8 +161,8 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
             return std::nullopt;
         }
         const StorageBufferAddr storage_buffer{
-            .index = index.U32(),
-            .offset = offset.U32(),
+            .index{index.U32()},
+            .offset{offset.U32()},
         };
         if (bias && !MeetsBias(storage_buffer, *bias)) {
             // We have to blacklist some addresses in case we wrongly point to them
diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp
index 29f65966c3..3b110af61d 100644
--- a/src/shader_recompiler/main.cpp
+++ b/src/shader_recompiler/main.cpp
@@ -76,5 +76,5 @@ int main() {
     fmt::print(stdout, "{}\n", cfg.Dot());
     IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)};
     fmt::print(stdout, "{}\n", IR::DumpProgram(program));
-    // Backend::SPIRV::EmitSPIRV spirv{program};
+    Backend::SPIRV::EmitSPIRV spirv{program};
 }
-- 
cgit v1.2.3-70-g09d2


From 6db69990da9f232e6d982cdcb69c2e27d93075cf Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 19 Feb 2021 18:10:18 -0300
Subject: spirv: Add lower fp16 to fp32 pass

---
 src/shader_recompiler/CMakeLists.txt               |   2 +
 .../backend/spirv/emit_context.cpp                 |  29 ++++--
 src/shader_recompiler/backend/spirv/emit_context.h |   6 +-
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |   2 +
 src/shader_recompiler/backend/spirv/emit_spirv.h   | 103 +++++++++++----------
 .../spirv/emit_spirv_bitwise_conversion.cpp        |  28 ++++--
 .../backend/spirv/emit_spirv_composite.cpp         |  48 +++++-----
 .../backend/spirv/emit_spirv_control_flow.cpp      |   2 +-
 .../backend/spirv/emit_spirv_convert.cpp           |  89 ++++++++++++++++++
 .../backend/spirv/emit_spirv_floating_point.cpp    |  48 +++++-----
 .../backend/spirv/emit_spirv_integer.cpp           |  16 ----
 .../backend/spirv/emit_spirv_logical.cpp           |  72 +-------------
 .../backend/spirv/emit_spirv_memory.cpp            |  22 +++--
 src/shader_recompiler/frontend/ir/condition.h      |   2 +-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  70 +++++++-------
 .../frontend/ir/microinstruction.cpp               |   4 +
 .../frontend/ir/microinstruction.h                 |   2 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |   6 +-
 src/shader_recompiler/frontend/ir/program.cpp      |   2 +-
 src/shader_recompiler/frontend/maxwell/program.cpp |   2 +
 .../impl/floating_point_conversion_integer.cpp     |  62 ++++++++++---
 .../frontend/maxwell/translate/impl/impl.h         |   2 +-
 .../ir_opt/collect_shader_info_pass.cpp            |  18 +++-
 .../ir_opt/constant_propagation_pass.cpp           |  12 +--
 .../ir_opt/lower_fp16_to_fp32.cpp                  |  79 ++++++++++++++++
 src/shader_recompiler/ir_opt/passes.h              |   1 +
 src/shader_recompiler/main.cpp                     |  10 +-
 src/shader_recompiler/object_pool.h                |   2 +-
 src/video_core/renderer_vulkan/vk_compute_pass.cpp |   3 +
 .../renderer_vulkan/vk_pipeline_cache.cpp          |   8 +-
 src/video_core/renderer_vulkan/vk_rasterizer.cpp   |   2 -
 src/video_core/vulkan_common/vulkan_device.cpp     |  10 +-
 32 files changed, 479 insertions(+), 285 deletions(-)
 create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
 create mode 100644 src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index b56bdd3d9c..6047f3ebeb 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -7,6 +7,7 @@ add_library(shader_recompiler STATIC
     backend/spirv/emit_spirv_composite.cpp
     backend/spirv/emit_spirv_context_get_set.cpp
     backend/spirv/emit_spirv_control_flow.cpp
+    backend/spirv/emit_spirv_convert.cpp
     backend/spirv/emit_spirv_floating_point.cpp
     backend/spirv/emit_spirv_integer.cpp
     backend/spirv/emit_spirv_logical.cpp
@@ -82,6 +83,7 @@ add_library(shader_recompiler STATIC
     ir_opt/dead_code_elimination_pass.cpp
     ir_opt/global_memory_to_storage_buffer_pass.cpp
     ir_opt/identity_removal_pass.cpp
+    ir_opt/lower_fp16_to_fp32.cpp
     ir_opt/passes.h
     ir_opt/ssa_rewrite_pass.cpp
     ir_opt/verification_pass.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 770067d988..ea1c8a3be1 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -30,8 +30,11 @@ EmitContext::EmitContext(IR::Program& program) : Sirit::Module(0x00010000) {
     DefineCommonTypes(program.info);
     DefineCommonConstants();
     DefineSpecialVariables(program.info);
-    DefineConstantBuffers(program.info);
-    DefineStorageBuffers(program.info);
+
+    u32 binding{};
+    DefineConstantBuffers(program.info, binding);
+    DefineStorageBuffers(program.info, binding);
+
     DefineLabels(program);
 }
 
@@ -58,6 +61,12 @@ void EmitContext::DefineCommonTypes(const Info& info) {
 
     U1 = Name(TypeBool(), "u1");
 
+    // TODO: Conditionally define these
+    AddCapability(spv::Capability::Int16);
+    AddCapability(spv::Capability::Int64);
+    U16 = Name(TypeInt(16, false), "u16");
+    U64 = Name(TypeInt(64, false), "u64");
+
     F32.Define(*this, TypeFloat(32), "f32");
     U32.Define(*this, TypeInt(32, false), "u32");
 
@@ -95,12 +104,12 @@ void EmitContext::DefineSpecialVariables(const Info& info) {
     }
 }
 
-void EmitContext::DefineConstantBuffers(const Info& info) {
+void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
     if (info.constant_buffer_descriptors.empty()) {
         return;
     }
     const Id array_type{TypeArray(U32[1], Constant(U32[1], 4096))};
-    Decorate(array_type, spv::Decoration::ArrayStride, 16U);
+    Decorate(array_type, spv::Decoration::ArrayStride, 4U);
 
     const Id struct_type{TypeStruct(array_type)};
     Name(struct_type, "cbuf_block");
@@ -111,18 +120,19 @@ void EmitContext::DefineConstantBuffers(const Info& info) {
     const Id uniform_type{TypePointer(spv::StorageClass::Uniform, struct_type)};
     uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]);
 
-    u32 binding{};
+    u32 index{};
     for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
         const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)};
         Decorate(id, spv::Decoration::Binding, binding);
         Decorate(id, spv::Decoration::DescriptorSet, 0U);
         Name(id, fmt::format("c{}", desc.index));
         std::fill_n(cbufs.data() + desc.index, desc.count, id);
+        index += desc.count;
         binding += desc.count;
     }
 }
 
-void EmitContext::DefineStorageBuffers(const Info& info) {
+void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
     if (info.storage_buffers_descriptors.empty()) {
         return;
     }
@@ -140,13 +150,14 @@ void EmitContext::DefineStorageBuffers(const Info& info) {
     const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
     storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
 
-    u32 binding{};
+    u32 index{};
     for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
         const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)};
         Decorate(id, spv::Decoration::Binding, binding);
         Decorate(id, spv::Decoration::DescriptorSet, 0U);
-        Name(id, fmt::format("ssbo{}", binding));
-        std::fill_n(ssbos.data() + binding, desc.count, id);
+        Name(id, fmt::format("ssbo{}", index));
+        std::fill_n(ssbos.data() + index, desc.count, id);
+        index += desc.count;
         binding += desc.count;
     }
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index c4b84759dc..8de203da2a 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -37,6 +37,8 @@ public:
 
     Id void_id{};
     Id U1{};
+    Id U16{};
+    Id U64{};
     VectorTypes F32;
     VectorTypes U32;
     VectorTypes F16;
@@ -59,8 +61,8 @@ private:
     void DefineCommonTypes(const Info& info);
     void DefineCommonConstants();
     void DefineSpecialVariables(const Info& info);
-    void DefineConstantBuffers(const Info& info);
-    void DefineStorageBuffers(const Info& info);
+    void DefineConstantBuffers(const Info& info, u32& binding);
+    void DefineStorageBuffers(const Info& info, u32& binding);
     void DefineLabels(IR::Program& program);
 };
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index d597184359..4ce07c2814 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -14,6 +14,8 @@
 #include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/program.h"
 
+#pragma optimize("", off)
+
 namespace Shader::Backend::SPIRV {
 namespace {
 template <class Func>
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 5813f51ff1..2b59c0b726 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -79,26 +79,27 @@ void EmitWriteStorageU16(EmitContext& ctx);
 void EmitWriteStorageS16(EmitContext& ctx);
 void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
                         Id value);
-void EmitWriteStorage64(EmitContext& ctx);
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value);
 void EmitWriteStorage128(EmitContext& ctx);
-void EmitCompositeConstructU32x2(EmitContext& ctx);
-void EmitCompositeConstructU32x3(EmitContext& ctx);
-void EmitCompositeConstructU32x4(EmitContext& ctx);
-void EmitCompositeExtractU32x2(EmitContext& ctx);
-Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index);
-void EmitCompositeExtractU32x4(EmitContext& ctx);
+Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
+Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
+Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
 void EmitCompositeConstructF16x2(EmitContext& ctx);
 void EmitCompositeConstructF16x3(EmitContext& ctx);
 void EmitCompositeConstructF16x4(EmitContext& ctx);
-void EmitCompositeExtractF16x2(EmitContext& ctx);
-void EmitCompositeExtractF16x3(EmitContext& ctx);
-void EmitCompositeExtractF16x4(EmitContext& ctx);
+Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
 void EmitCompositeConstructF32x2(EmitContext& ctx);
 void EmitCompositeConstructF32x3(EmitContext& ctx);
 void EmitCompositeConstructF32x4(EmitContext& ctx);
-void EmitCompositeExtractF32x2(EmitContext& ctx);
-void EmitCompositeExtractF32x3(EmitContext& ctx);
-void EmitCompositeExtractF32x4(EmitContext& ctx);
+Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
 void EmitCompositeConstructF64x2(EmitContext& ctx);
 void EmitCompositeConstructF64x3(EmitContext& ctx);
 void EmitCompositeConstructF64x4(EmitContext& ctx);
@@ -116,11 +117,13 @@ void EmitBitCastF16U16(EmitContext& ctx);
 Id EmitBitCastF32U32(EmitContext& ctx, Id value);
 void EmitBitCastF64U64(EmitContext& ctx);
 void EmitPackUint2x32(EmitContext& ctx);
-void EmitUnpackUint2x32(EmitContext& ctx);
-void EmitPackFloat2x16(EmitContext& ctx);
-void EmitUnpackFloat2x16(EmitContext& ctx);
-void EmitPackDouble2x32(EmitContext& ctx);
-void EmitUnpackDouble2x32(EmitContext& ctx);
+Id EmitUnpackUint2x32(EmitContext& ctx, Id value);
+Id EmitPackFloat2x16(EmitContext& ctx, Id value);
+Id EmitUnpackFloat2x16(EmitContext& ctx, Id value);
+Id EmitPackHalf2x16(EmitContext& ctx, Id value);
+Id EmitUnpackHalf2x16(EmitContext& ctx, Id value);
+Id EmitPackDouble2x32(EmitContext& ctx, Id value);
+Id EmitUnpackDouble2x32(EmitContext& ctx, Id value);
 void EmitGetZeroFromOp(EmitContext& ctx);
 void EmitGetSignFromOp(EmitContext& ctx);
 void EmitGetCarryFromOp(EmitContext& ctx);
@@ -159,18 +162,18 @@ void EmitFPLog2(EmitContext& ctx);
 void EmitFPSaturate16(EmitContext& ctx);
 void EmitFPSaturate32(EmitContext& ctx);
 void EmitFPSaturate64(EmitContext& ctx);
-void EmitFPRoundEven16(EmitContext& ctx);
-void EmitFPRoundEven32(EmitContext& ctx);
-void EmitFPRoundEven64(EmitContext& ctx);
-void EmitFPFloor16(EmitContext& ctx);
-void EmitFPFloor32(EmitContext& ctx);
-void EmitFPFloor64(EmitContext& ctx);
-void EmitFPCeil16(EmitContext& ctx);
-void EmitFPCeil32(EmitContext& ctx);
-void EmitFPCeil64(EmitContext& ctx);
-void EmitFPTrunc16(EmitContext& ctx);
-void EmitFPTrunc32(EmitContext& ctx);
-void EmitFPTrunc64(EmitContext& ctx);
+Id EmitFPRoundEven16(EmitContext& ctx, Id value);
+Id EmitFPRoundEven32(EmitContext& ctx, Id value);
+Id EmitFPRoundEven64(EmitContext& ctx, Id value);
+Id EmitFPFloor16(EmitContext& ctx, Id value);
+Id EmitFPFloor32(EmitContext& ctx, Id value);
+Id EmitFPFloor64(EmitContext& ctx, Id value);
+Id EmitFPCeil16(EmitContext& ctx, Id value);
+Id EmitFPCeil32(EmitContext& ctx, Id value);
+Id EmitFPCeil64(EmitContext& ctx, Id value);
+Id EmitFPTrunc16(EmitContext& ctx, Id value);
+Id EmitFPTrunc32(EmitContext& ctx, Id value);
+Id EmitFPTrunc64(EmitContext& ctx, Id value);
 Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 void EmitIAdd64(EmitContext& ctx);
 Id EmitISub32(EmitContext& ctx, Id a, Id b);
@@ -201,25 +204,25 @@ void EmitLogicalOr(EmitContext& ctx);
 void EmitLogicalAnd(EmitContext& ctx);
 void EmitLogicalXor(EmitContext& ctx);
 void EmitLogicalNot(EmitContext& ctx);
-void EmitConvertS16F16(EmitContext& ctx);
-void EmitConvertS16F32(EmitContext& ctx);
-void EmitConvertS16F64(EmitContext& ctx);
-void EmitConvertS32F16(EmitContext& ctx);
-void EmitConvertS32F32(EmitContext& ctx);
-void EmitConvertS32F64(EmitContext& ctx);
-void EmitConvertS64F16(EmitContext& ctx);
-void EmitConvertS64F32(EmitContext& ctx);
-void EmitConvertS64F64(EmitContext& ctx);
-void EmitConvertU16F16(EmitContext& ctx);
-void EmitConvertU16F32(EmitContext& ctx);
-void EmitConvertU16F64(EmitContext& ctx);
-void EmitConvertU32F16(EmitContext& ctx);
-void EmitConvertU32F32(EmitContext& ctx);
-void EmitConvertU32F64(EmitContext& ctx);
-void EmitConvertU64F16(EmitContext& ctx);
-void EmitConvertU64F32(EmitContext& ctx);
-void EmitConvertU64F64(EmitContext& ctx);
-void EmitConvertU64U32(EmitContext& ctx);
-void EmitConvertU32U64(EmitContext& ctx);
+Id EmitConvertS16F16(EmitContext& ctx, Id value);
+Id EmitConvertS16F32(EmitContext& ctx, Id value);
+Id EmitConvertS16F64(EmitContext& ctx, Id value);
+Id EmitConvertS32F16(EmitContext& ctx, Id value);
+Id EmitConvertS32F32(EmitContext& ctx, Id value);
+Id EmitConvertS32F64(EmitContext& ctx, Id value);
+Id EmitConvertS64F16(EmitContext& ctx, Id value);
+Id EmitConvertS64F32(EmitContext& ctx, Id value);
+Id EmitConvertS64F64(EmitContext& ctx, Id value);
+Id EmitConvertU16F16(EmitContext& ctx, Id value);
+Id EmitConvertU16F32(EmitContext& ctx, Id value);
+Id EmitConvertU16F64(EmitContext& ctx, Id value);
+Id EmitConvertU32F16(EmitContext& ctx, Id value);
+Id EmitConvertU32F32(EmitContext& ctx, Id value);
+Id EmitConvertU32F64(EmitContext& ctx, Id value);
+Id EmitConvertU64F16(EmitContext& ctx, Id value);
+Id EmitConvertU64F32(EmitContext& ctx, Id value);
+Id EmitConvertU64F64(EmitContext& ctx, Id value);
+Id EmitConvertU64U32(EmitContext& ctx, Id value);
+Id EmitConvertU32U64(EmitContext& ctx, Id value);
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
index 49c2004987..e0d1ba413f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
@@ -34,24 +34,32 @@ void EmitPackUint2x32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitUnpackUint2x32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitUnpackUint2x32(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.U32[2], value);
 }
 
-void EmitPackFloat2x16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitPackFloat2x16(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.U32[1], value);
 }
 
-void EmitUnpackFloat2x16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.F16[2], value);
 }
 
-void EmitPackDouble2x32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitPackHalf2x16(EmitContext& ctx, Id value) {
+    return ctx.OpPackHalf2x16(ctx.U32[1], value);
 }
 
-void EmitUnpackDouble2x32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) {
+    return ctx.OpUnpackHalf2x16(ctx.F32[2], value);
+}
+
+Id EmitPackDouble2x32(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.F64[1], value);
+}
+
+Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) {
+    return ctx.OpBitcast(ctx.U32[2], value);
 }
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
index 348e4796d5..c950854a00 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
@@ -6,28 +6,28 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitCompositeConstructU32x2(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) {
+    return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2);
 }
 
-void EmitCompositeConstructU32x3(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
+    return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3);
 }
 
-void EmitCompositeConstructU32x4(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
+    return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4);
 }
 
-void EmitCompositeExtractU32x2(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
 }
 
-Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) {
-    return ctx.OpCompositeExtract(ctx.U32[1], vector, index);
+Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
 }
 
-void EmitCompositeExtractU32x4(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
 }
 
 void EmitCompositeConstructF16x2(EmitContext&) {
@@ -42,16 +42,16 @@ void EmitCompositeConstructF16x4(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitCompositeExtractF16x2(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
 }
 
-void EmitCompositeExtractF16x3(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
 }
 
-void EmitCompositeExtractF16x4(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
 }
 
 void EmitCompositeConstructF32x2(EmitContext&) {
@@ -66,16 +66,16 @@ void EmitCompositeConstructF32x4(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitCompositeExtractF32x2(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
 }
 
-void EmitCompositeExtractF32x3(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
 }
 
-void EmitCompositeExtractF32x4(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) {
+    return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
 }
 
 void EmitCompositeConstructF64x2(EmitContext&) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
index 6c4199664f..48755b8278 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -11,7 +11,7 @@ void EmitBranch(EmitContext& ctx, IR::Block* label) {
 }
 
 void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label,
-                                      IR::Block* false_label) {
+                           IR::Block* false_label) {
     ctx.OpBranchConditional(condition, true_label->Definition<Id>(), false_label->Definition<Id>());
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
new file mode 100644
index 0000000000..76ccaffcef
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
@@ -0,0 +1,89 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitConvertS16F16(EmitContext& ctx, Id value) {
+    return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+}
+
+Id EmitConvertS16F32(EmitContext& ctx, Id value) {
+    return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+}
+
+Id EmitConvertS16F64(EmitContext& ctx, Id value) {
+    return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+}
+
+Id EmitConvertS32F16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToS(ctx.U32[1], value);
+}
+
+Id EmitConvertS32F32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToS(ctx.U32[1], value);
+}
+
+Id EmitConvertS32F64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToS(ctx.U32[1], value);
+}
+
+Id EmitConvertS64F16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertS64F32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertS64F64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertU16F16(EmitContext& ctx, Id value) {
+    return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+}
+
+Id EmitConvertU16F32(EmitContext& ctx, Id value) {
+    return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+}
+
+Id EmitConvertU16F64(EmitContext& ctx, Id value) {
+    return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+}
+
+Id EmitConvertU32F16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU32F32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU32F64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU64F16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64F32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64F64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64U32(EmitContext& ctx, Id value) {
+    return ctx.OpUConvert(ctx.U64, value);
+}
+
+Id EmitConvertU32U64(EmitContext& ctx, Id value) {
+    return ctx.OpUConvert(ctx.U32[1], value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
index d24fbb353e..9ef1805310 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -169,52 +169,52 @@ void EmitFPSaturate64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitFPRoundEven16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
+    return ctx.OpRoundEven(ctx.F16[1], value);
 }
 
-void EmitFPRoundEven32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPRoundEven32(EmitContext& ctx, Id value) {
+    return ctx.OpRoundEven(ctx.F32[1], value);
 }
 
-void EmitFPRoundEven64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPRoundEven64(EmitContext& ctx, Id value) {
+    return ctx.OpRoundEven(ctx.F64[1], value);
 }
 
-void EmitFPFloor16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPFloor16(EmitContext& ctx, Id value) {
+    return ctx.OpFloor(ctx.F16[1], value);
 }
 
-void EmitFPFloor32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPFloor32(EmitContext& ctx, Id value) {
+    return ctx.OpFloor(ctx.F32[1], value);
 }
 
-void EmitFPFloor64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPFloor64(EmitContext& ctx, Id value) {
+    return ctx.OpFloor(ctx.F64[1], value);
 }
 
-void EmitFPCeil16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPCeil16(EmitContext& ctx, Id value) {
+    return ctx.OpCeil(ctx.F16[1], value);
 }
 
-void EmitFPCeil32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPCeil32(EmitContext& ctx, Id value) {
+    return ctx.OpCeil(ctx.F32[1], value);
 }
 
-void EmitFPCeil64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPCeil64(EmitContext& ctx, Id value) {
+    return ctx.OpCeil(ctx.F64[1], value);
 }
 
-void EmitFPTrunc16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPTrunc16(EmitContext& ctx, Id value) {
+    return ctx.OpTrunc(ctx.F16[1], value);
 }
 
-void EmitFPTrunc32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPTrunc32(EmitContext& ctx, Id value) {
+    return ctx.OpTrunc(ctx.F32[1], value);
 }
 
-void EmitFPTrunc64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPTrunc64(EmitContext& ctx, Id value) {
+    return ctx.OpTrunc(ctx.F64[1], value);
 }
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
index a1d16b81e4..22117a4ee9 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -113,20 +113,4 @@ Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs);
 }
 
-void EmitLogicalOr(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitLogicalAnd(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitLogicalXor(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitLogicalNot(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
index ff2f4fb744..c5a07252f1 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
@@ -6,83 +6,19 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitConvertS16F16(EmitContext&) {
+void EmitLogicalOr(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitConvertS16F32(EmitContext&) {
+void EmitLogicalAnd(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitConvertS16F64(EmitContext&) {
+void EmitLogicalXor(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitConvertS32F16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertS32F32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertS32F64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertS64F16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertS64F32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertS64F64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertU16F16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertU16F32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertU16F64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertU32F16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertU32F32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertU32F64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertU64F16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertU64F32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertU64F64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertU64U32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitConvertU32U64(EmitContext&) {
+void EmitLogicalNot(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
index 77d698ffd4..808c1b4016 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
@@ -94,8 +94,7 @@ void EmitLoadStorageS16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding,
-                                const IR::Value& offset) {
+Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
     if (!binding.IsImmediate()) {
         throw NotImplementedException("Dynamic storage buffer indexing");
     }
@@ -129,8 +128,8 @@ void EmitWriteStorageS16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding,
-                                   const IR::Value& offset, Id value) {
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value) {
     if (!binding.IsImmediate()) {
         throw NotImplementedException("Dynamic storage buffer indexing");
     }
@@ -140,8 +139,19 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding,
     ctx.OpStore(pointer, value);
 }
 
-void EmitWriteStorage64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value) {
+    if (!binding.IsImmediate()) {
+        throw NotImplementedException("Dynamic storage buffer indexing");
+    }
+    // TODO: Support reinterpreting bindings, guaranteed to be aligned
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id low_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id high_index{ctx.OpIAdd(ctx.U32[1], low_index, ctx.Constant(ctx.U32[1], 1U))};
+    const Id low_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, low_index)};
+    const Id high_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, high_index)};
+    ctx.OpStore(low_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
+    ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
 }
 
 void EmitWriteStorage128(EmitContext&) {
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h
index 16b4ae888e..51c2f15cf5 100644
--- a/src/shader_recompiler/frontend/ir/condition.h
+++ b/src/shader_recompiler/frontend/ir/condition.h
@@ -4,8 +4,8 @@
 
 #pragma once
 
-#include <string>
 #include <compare>
+#include <string>
 
 #include <fmt/format.h>
 
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index f42489d41d..559ab9cca6 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -547,11 +547,11 @@ F32 IREmitter::FPSqrt(const F32& value) {
 
 F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
     switch (value.Type()) {
-    case Type::U16:
+    case Type::F16:
         return Inst<F16>(Opcode::FPSaturate16, value);
-    case Type::U32:
+    case Type::F32:
         return Inst<F32>(Opcode::FPSaturate32, value);
-    case Type::U64:
+    case Type::F64:
         return Inst<F64>(Opcode::FPSaturate64, value);
     default:
         ThrowInvalidType(value.Type());
@@ -560,11 +560,11 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
 
 F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) {
     switch (value.Type()) {
-    case Type::U16:
+    case Type::F16:
         return Inst<F16>(Opcode::FPRoundEven16, value);
-    case Type::U32:
+    case Type::F32:
         return Inst<F32>(Opcode::FPRoundEven32, value);
-    case Type::U64:
+    case Type::F64:
         return Inst<F64>(Opcode::FPRoundEven64, value);
     default:
         ThrowInvalidType(value.Type());
@@ -573,11 +573,11 @@ F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) {
 
 F16F32F64 IREmitter::FPFloor(const F16F32F64& value) {
     switch (value.Type()) {
-    case Type::U16:
+    case Type::F16:
         return Inst<F16>(Opcode::FPFloor16, value);
-    case Type::U32:
+    case Type::F32:
         return Inst<F32>(Opcode::FPFloor32, value);
-    case Type::U64:
+    case Type::F64:
         return Inst<F64>(Opcode::FPFloor64, value);
     default:
         ThrowInvalidType(value.Type());
@@ -586,11 +586,11 @@ F16F32F64 IREmitter::FPFloor(const F16F32F64& value) {
 
 F16F32F64 IREmitter::FPCeil(const F16F32F64& value) {
     switch (value.Type()) {
-    case Type::U16:
+    case Type::F16:
         return Inst<F16>(Opcode::FPCeil16, value);
-    case Type::U32:
+    case Type::F32:
         return Inst<F32>(Opcode::FPCeil32, value);
-    case Type::U64:
+    case Type::F64:
         return Inst<F64>(Opcode::FPCeil64, value);
     default:
         ThrowInvalidType(value.Type());
@@ -599,11 +599,11 @@ F16F32F64 IREmitter::FPCeil(const F16F32F64& value) {
 
 F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) {
     switch (value.Type()) {
-    case Type::U16:
+    case Type::F16:
         return Inst<F16>(Opcode::FPTrunc16, value);
-    case Type::U32:
+    case Type::F32:
         return Inst<F32>(Opcode::FPTrunc32, value);
-    case Type::U64:
+    case Type::F64:
         return Inst<F64>(Opcode::FPTrunc64, value);
     default:
         ThrowInvalidType(value.Type());
@@ -729,33 +729,33 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) {
     switch (bitsize) {
     case 16:
         switch (value.Type()) {
-        case Type::U16:
+        case Type::F16:
             return Inst<U32>(Opcode::ConvertS16F16, value);
-        case Type::U32:
+        case Type::F32:
             return Inst<U32>(Opcode::ConvertS16F32, value);
-        case Type::U64:
+        case Type::F64:
             return Inst<U32>(Opcode::ConvertS16F64, value);
         default:
             ThrowInvalidType(value.Type());
         }
     case 32:
         switch (value.Type()) {
-        case Type::U16:
+        case Type::F16:
             return Inst<U32>(Opcode::ConvertS32F16, value);
-        case Type::U32:
+        case Type::F32:
             return Inst<U32>(Opcode::ConvertS32F32, value);
-        case Type::U64:
+        case Type::F64:
             return Inst<U32>(Opcode::ConvertS32F64, value);
         default:
             ThrowInvalidType(value.Type());
         }
     case 64:
         switch (value.Type()) {
-        case Type::U16:
+        case Type::F16:
             return Inst<U64>(Opcode::ConvertS64F16, value);
-        case Type::U32:
+        case Type::F32:
             return Inst<U64>(Opcode::ConvertS64F32, value);
-        case Type::U64:
+        case Type::F64:
             return Inst<U64>(Opcode::ConvertS64F64, value);
         default:
             ThrowInvalidType(value.Type());
@@ -769,33 +769,33 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) {
     switch (bitsize) {
     case 16:
         switch (value.Type()) {
-        case Type::U16:
+        case Type::F16:
             return Inst<U32>(Opcode::ConvertU16F16, value);
-        case Type::U32:
+        case Type::F32:
             return Inst<U32>(Opcode::ConvertU16F32, value);
-        case Type::U64:
+        case Type::F64:
             return Inst<U32>(Opcode::ConvertU16F64, value);
         default:
             ThrowInvalidType(value.Type());
         }
     case 32:
         switch (value.Type()) {
-        case Type::U16:
+        case Type::F16:
             return Inst<U32>(Opcode::ConvertU32F16, value);
-        case Type::U32:
+        case Type::F32:
             return Inst<U32>(Opcode::ConvertU32F32, value);
-        case Type::U64:
+        case Type::F64:
             return Inst<U32>(Opcode::ConvertU32F64, value);
         default:
             ThrowInvalidType(value.Type());
         }
     case 64:
         switch (value.Type()) {
-        case Type::U16:
+        case Type::F16:
             return Inst<U64>(Opcode::ConvertU64F16, value);
-        case Type::U32:
+        case Type::F32:
             return Inst<U64>(Opcode::ConvertU64F32, value);
-        case Type::U64:
+        case Type::F64:
             return Inst<U64>(Opcode::ConvertU64F64, value);
         default:
             ThrowInvalidType(value.Type());
@@ -829,10 +829,10 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) {
     case 64:
         switch (value.Type()) {
         case Type::U32:
+            return Inst<U64>(Opcode::ConvertU64U32, value);
+        case Type::U64:
             // Nothing to do
             return value;
-        case Type::U64:
-            return Inst<U64>(Opcode::ConvertU64U32, value);
         default:
             break;
         }
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index ee76db9adf..d6a9be87d5 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -216,6 +216,10 @@ void Inst::ReplaceUsesWith(Value replacement) {
     }
 }
 
+void Inst::ReplaceOpcode(IR::Opcode opcode) {
+    op = opcode;
+}
+
 void Inst::Use(const Value& value) {
     Inst* const inst{value.Inst()};
     ++inst->use_count;
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 5b244fa0bf..321393dd7c 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -86,6 +86,8 @@ public:
 
     void ReplaceUsesWith(Value replacement);
 
+    void ReplaceOpcode(IR::Opcode opcode);
+
     template <typename FlagsType>
     requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
         [[nodiscard]] FlagsType Flags() const noexcept {
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index ede5e20c2d..50da77535e 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -119,8 +119,10 @@ OPCODE(PackUint2x32,                                        U64,            U32x
 OPCODE(UnpackUint2x32,                                      U32x2,          U64,                                                            )
 OPCODE(PackFloat2x16,                                       U32,            F16x2,                                                          )
 OPCODE(UnpackFloat2x16,                                     F16x2,          U32,                                                            )
-OPCODE(PackDouble2x32,                                      U64,            U32x2,                                                          )
-OPCODE(UnpackDouble2x32,                                    U32x2,          U64,                                                            )
+OPCODE(PackHalf2x16,                                        U32,            F32x2,                                                          )
+OPCODE(UnpackHalf2x16,                                      F32x2,          U32,                                                            )
+OPCODE(PackDouble2x32,                                      F64,            U32x2,                                                          )
+OPCODE(UnpackDouble2x32,                                    U32x2,          F64,                                                            )
 
 // Pseudo-operation, handled specially at final emit
 OPCODE(GetZeroFromOp,                                       U1,             Opaque,                                                         )
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp
index 0ce99ef2a9..8c301c3a10 100644
--- a/src/shader_recompiler/frontend/ir/program.cpp
+++ b/src/shader_recompiler/frontend/ir/program.cpp
@@ -35,4 +35,4 @@ std::string DumpProgram(const Program& program) {
     return ret;
 }
 
-} // namespace Shader::IR
\ No newline at end of file
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 8c44ebb29d..16cdc12e2a 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -56,6 +56,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
             .post_order_blocks{},
         });
     }
+    Optimization::LowerFp16ToFp32(program);
     for (IR::Function& function : functions) {
         function.post_order_blocks = PostOrder(function.blocks);
         Optimization::SsaRewritePass(function.post_order_blocks);
@@ -69,6 +70,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
         Optimization::VerificationPass(function);
     }
     Optimization::CollectShaderInfoPass(program);
+
     fmt::print(stdout, "{}\n", IR::DumpProgram(program));
     return program;
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
index 3d0c48457c..ae2d37405f 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -34,7 +34,7 @@ union F2I {
     BitField<8, 2, DestFormat> dest_format;
     BitField<10, 2, SrcFormat> src_format;
     BitField<12, 1, u64> is_signed;
-    BitField<39, 1, Rounding> rounding;
+    BitField<39, 2, Rounding> rounding;
     BitField<49, 1, u64> half;
     BitField<44, 1, u64> ftz;
     BitField<45, 1, u64> abs;
@@ -55,6 +55,28 @@ size_t BitSize(DestFormat dest_format) {
     }
 }
 
+IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 14, s64> offset;
+        BitField<34, 5, u64> binding;
+    } const cbuf{insn};
+    if (cbuf.binding >= 18) {
+        throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+    }
+    if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
+        throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
+    }
+    if (cbuf.offset % 2 != 0) {
+        throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
+    }
+    const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
+    const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
+    const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
+    const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
+    return v.ir.PackDouble2x32(vector);
+}
+
 void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
     // F2I is used to convert from a floating point value to an integer
     const F2I f2i{insn};
@@ -82,19 +104,16 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
     const size_t bitsize{BitSize(f2i.dest_format)};
     const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)};
 
-    v.X(f2i.dest_reg, result);
+    if (bitsize == 64) {
+        const IR::Value vector{v.ir.UnpackUint2x32(result)};
+        v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)});
+        v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)});
+    } else {
+        v.X(f2i.dest_reg, result);
+    }
 
     if (f2i.cc != 0) {
-        v.SetZFlag(v.ir.GetZeroFromOp(result));
-        if (is_signed) {
-            v.SetSFlag(v.ir.GetSignFromOp(result));
-        } else {
-            v.ResetSFlag();
-        }
-        v.ResetCFlag();
-
-        // TODO: Investigate if out of bound conversions sets the overflow flag
-        v.ResetOFlag();
+        throw NotImplementedException("F2I CC");
     }
 }
 } // Anonymous namespace
@@ -118,12 +137,25 @@ void TranslatorVisitor::F2I_reg(u64 insn) {
                                           f2i.base.src_format.Value());
         }
     }()};
-
     TranslateF2I(*this, insn, op_a);
 }
 
-void TranslatorVisitor::F2I_cbuf(u64) {
-    throw NotImplementedException("{}", Opcode::F2I_cbuf);
+void TranslatorVisitor::F2I_cbuf(u64 insn) {
+    const F2I f2i{insn};
+    const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
+        switch (f2i.src_format) {
+        case SrcFormat::F16:
+            return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
+        case SrcFormat::F32:
+            return GetCbufF(insn);
+        case SrcFormat::F64: {
+            return UnpackCbuf(*this, insn);
+        }
+        default:
+            throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
+        }
+    }()};
+    TranslateF2I(*this, insn, op_a);
 }
 
 void TranslatorVisitor::F2I_imm(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index 8bd468244e..27aba2cf87 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -11,7 +11,7 @@ namespace Shader::Maxwell {
 
 class TranslatorVisitor {
 public:
-    explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_} ,ir(block) {}
+    explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
 
     Environment& env;
     IR::IREmitter ir;
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index f2326dea14..f7f102f533 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -26,6 +26,22 @@ void Visit(Info& info, IR::Inst& inst) {
     case IR::Opcode::LocalInvocationId:
         info.uses_local_invocation_id = true;
         break;
+    case IR::Opcode::CompositeConstructF16x2:
+    case IR::Opcode::CompositeConstructF16x3:
+    case IR::Opcode::CompositeConstructF16x4:
+    case IR::Opcode::CompositeExtractF16x2:
+    case IR::Opcode::CompositeExtractF16x3:
+    case IR::Opcode::CompositeExtractF16x4:
+    case IR::Opcode::BitCastU16F16:
+    case IR::Opcode::BitCastF16U16:
+    case IR::Opcode::PackFloat2x16:
+    case IR::Opcode::UnpackFloat2x16:
+    case IR::Opcode::ConvertS16F16:
+    case IR::Opcode::ConvertS32F16:
+    case IR::Opcode::ConvertS64F16:
+    case IR::Opcode::ConvertU16F16:
+    case IR::Opcode::ConvertU32F16:
+    case IR::Opcode::ConvertU64F16:
     case IR::Opcode::FPAbs16:
     case IR::Opcode::FPAdd16:
     case IR::Opcode::FPCeil16:
@@ -36,7 +52,7 @@ void Visit(Info& info, IR::Inst& inst) {
     case IR::Opcode::FPRoundEven16:
     case IR::Opcode::FPSaturate16:
     case IR::Opcode::FPTrunc16:
-        info.uses_fp16;
+        info.uses_fp16 = true;
         break;
     case IR::Opcode::FPAbs64:
     case IR::Opcode::FPAdd64:
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 9eb61b54c5..4d4e88259a 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -104,12 +104,12 @@ void FoldGetPred(IR::Inst& inst) {
 bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
     /*
      * We are looking for this pattern:
-     *   %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 (uses: 1)
-     *   %rhs_mul = IMul32 %rhs_bfe, %factor_b (uses: 1)
-     *   %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 (uses: 1)
-     *   %rhs_mul = IMul32 %lhs_bfe, %factor_b (uses: 1)
-     *   %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 (uses: 1)
-     *   %result  = IAdd32 %lhs_shl, %rhs_mul (uses: 10)
+     *   %rhs_bfe = BitFieldUExtract %factor_a, #0, #16
+     *   %rhs_mul = IMul32 %rhs_bfe, %factor_b
+     *   %lhs_bfe = BitFieldUExtract %factor_a, #16, #16
+     *   %rhs_mul = IMul32 %lhs_bfe, %factor_b
+     *   %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16
+     *   %result  = IAdd32 %lhs_shl, %rhs_mul
      *
      * And replacing it with
      *   %result  = IMul32 %factor_a, %factor_b
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
new file mode 100644
index 0000000000..c7032f1686
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -0,0 +1,79 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+IR::Opcode Replace(IR::Opcode op) {
+    switch (op) {
+    case IR::Opcode::FPAbs16:
+        return IR::Opcode::FPAbs32;
+    case IR::Opcode::FPAdd16:
+        return IR::Opcode::FPAdd32;
+    case IR::Opcode::FPCeil16:
+        return IR::Opcode::FPCeil32;
+    case IR::Opcode::FPFloor16:
+        return IR::Opcode::FPFloor32;
+    case IR::Opcode::FPFma16:
+        return IR::Opcode::FPFma32;
+    case IR::Opcode::FPMul16:
+        return IR::Opcode::FPMul32;
+    case IR::Opcode::FPNeg16:
+        return IR::Opcode::FPNeg32;
+    case IR::Opcode::FPRoundEven16:
+        return IR::Opcode::FPRoundEven32;
+    case IR::Opcode::FPSaturate16:
+        return IR::Opcode::FPSaturate32;
+    case IR::Opcode::FPTrunc16:
+        return IR::Opcode::FPTrunc32;
+    case IR::Opcode::CompositeConstructF16x2:
+        return IR::Opcode::CompositeConstructF32x2;
+    case IR::Opcode::CompositeConstructF16x3:
+        return IR::Opcode::CompositeConstructF32x3;
+    case IR::Opcode::CompositeConstructF16x4:
+        return IR::Opcode::CompositeConstructF32x4;
+    case IR::Opcode::CompositeExtractF16x2:
+        return IR::Opcode::CompositeExtractF32x2;
+    case IR::Opcode::CompositeExtractF16x3:
+        return IR::Opcode::CompositeExtractF32x3;
+    case IR::Opcode::CompositeExtractF16x4:
+        return IR::Opcode::CompositeExtractF32x4;
+    case IR::Opcode::ConvertS16F16:
+        return IR::Opcode::ConvertS16F32;
+    case IR::Opcode::ConvertS32F16:
+        return IR::Opcode::ConvertS32F32;
+    case IR::Opcode::ConvertS64F16:
+        return IR::Opcode::ConvertS64F32;
+    case IR::Opcode::ConvertU16F16:
+        return IR::Opcode::ConvertU16F32;
+    case IR::Opcode::ConvertU32F16:
+        return IR::Opcode::ConvertU32F32;
+    case IR::Opcode::ConvertU64F16:
+        return IR::Opcode::ConvertU64F32;
+    case IR::Opcode::PackFloat2x16:
+        return IR::Opcode::PackHalf2x16;
+    case IR::Opcode::UnpackFloat2x16:
+        return IR::Opcode::UnpackHalf2x16;
+    default:
+        return op;
+    }
+}
+} // Anonymous namespace
+
+void LowerFp16ToFp32(IR::Program& program) {
+    for (IR::Function& function : program.functions) {
+        for (IR::Block* const block : function.blocks) {
+            for (IR::Inst& inst : block->Instructions()) {
+                inst.ReplaceOpcode(Replace(inst.Opcode()));
+            }
+        }
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 89e5811d3f..38106308cb 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -24,6 +24,7 @@ void ConstantPropagationPass(IR::Block& block);
 void DeadCodeEliminationPass(IR::Block& block);
 void GlobalMemoryToStorageBufferPass(IR::Program& program);
 void IdentityRemovalPass(IR::Function& function);
+void LowerFp16ToFp32(IR::Program& program);
 void SsaRewritePass(std::span<IR::Block* const> post_order_blocks);
 void VerificationPass(const IR::Function& function);
 
diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp
index 050a37f180..abd44e3232 100644
--- a/src/shader_recompiler/main.cpp
+++ b/src/shader_recompiler/main.cpp
@@ -67,8 +67,8 @@ int main() {
     ObjectPool<IR::Inst> inst_pool;
     ObjectPool<IR::Block> block_pool;
 
-    FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"};
-    // FileEnvironment env{"D:\\Shaders\\shader.bin"};
+    // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"};
+    FileEnvironment env{"D:\\Shaders\\shader.bin"};
     block_pool.ReleaseContents();
     inst_pool.ReleaseContents();
     flow_block_pool.ReleaseContents();
@@ -76,5 +76,9 @@ int main() {
     fmt::print(stdout, "{}\n", cfg.Dot());
     IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)};
     fmt::print(stdout, "{}\n", IR::DumpProgram(program));
-    void(Backend::SPIRV::EmitSPIRV(env, program));
+    const std::vector<u32> spirv{Backend::SPIRV::EmitSPIRV(env, program)};
+    std::FILE* const file{std::fopen("D:\\shader.spv", "wb")};
+    std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file);
+    std::fclose(file);
+    std::system("spirv-dis D:\\shader.spv");
 }
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h
index f78813b5f2..c10751b9d1 100644
--- a/src/shader_recompiler/object_pool.h
+++ b/src/shader_recompiler/object_pool.h
@@ -18,7 +18,7 @@ public:
     }
 
     template <typename... Args>
-    requires std::is_constructible_v<T, Args...> [[nodiscard]] T* Create(Args&&... args) {
+    requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) {
         return std::construct_at(Memory(), std::forward<Args>(args)...);
     }
 
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 4181d83ee9..a444d55d38 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -206,6 +206,8 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
         .codeSize = static_cast<u32>(code.size_bytes()),
         .pCode = code.data(),
     });
+    /*
+    FIXME
     pipeline = device.GetLogical().CreateComputePipeline({
         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
         .pNext = nullptr,
@@ -224,6 +226,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
         .basePipelineHandle = nullptr,
         .basePipelineIndex = 0,
     });
+    */
 }
 
 VKComputePass::~VKComputePass() = default;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 4bf3e4819c..c2a41a3603 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -31,8 +31,6 @@
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
-#pragma optimize("", off)
-
 namespace Vulkan {
 MICROPROFILE_DECLARE(Vulkan_PipelineCache);
 
@@ -180,6 +178,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {
         // TODO: Load from cache
     }
     const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)};
+
+    FILE* file = fopen("D:\\shader.spv", "wb");
+    fwrite(code.data(), 4, code.size(), file);
+    fclose(file);
+    std::system("spirv-dis D:\\shader.spv");
+
     shader_info->unique_hash = env.ComputeHash();
     shader_info->size_bytes = env.ShaderSize();
     return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index b757454c49..1b662f9f3a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -36,8 +36,6 @@
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
-#pragma optimize("", off)
-
 namespace Vulkan {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index f214510da6..85f903125b 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -247,9 +247,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
             .shaderStorageImageArrayDynamicIndexing = false,
             .shaderClipDistance = false,
             .shaderCullDistance = false,
-            .shaderFloat64 = false,
-            .shaderInt64 = false,
-            .shaderInt16 = false,
+            .shaderFloat64 = true,
+            .shaderInt64 = true,
+            .shaderInt16 = true,
             .shaderResourceResidency = false,
             .shaderResourceMinLod = false,
             .sparseBinding = false,
@@ -420,8 +420,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
     }
     if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
         // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
-        LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math");
-        is_float16_supported = false;
+        // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math");
+        // is_float16_supported = false;
     }
 
     graphics_queue = logical.GetQueue(graphics_family);
-- 
cgit v1.2.3-70-g09d2


From ab463712474de5f99eec137a9c6233e55fe184f0 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 8 Mar 2021 18:31:53 -0300
Subject: shader: Initial support for textures and TEX

---
 src/shader_recompiler/CMakeLists.txt               |   3 +
 .../backend/spirv/emit_context.cpp                 |  69 ++-
 src/shader_recompiler/backend/spirv/emit_context.h |   7 +
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |  12 +
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  32 +-
 .../backend/spirv/emit_spirv_convert.cpp           |  48 ++
 .../backend/spirv/emit_spirv_image.cpp             | 146 ++++++
 .../backend/spirv/emit_spirv_memory.cpp            |  18 +-
 src/shader_recompiler/environment.h                |   2 +
 src/shader_recompiler/file_environment.cpp         |   4 +
 src/shader_recompiler/file_environment.h           |   4 +-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 133 ++++-
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  21 +-
 .../frontend/ir/microinstruction.cpp               |  73 ++-
 .../frontend/ir/microinstruction.h                 |  22 +-
 src/shader_recompiler/frontend/ir/modifiers.h      |  10 +
 src/shader_recompiler/frontend/ir/opcodes.cpp      |   2 +-
 src/shader_recompiler/frontend/ir/opcodes.inc      | 569 +++++++++++----------
 src/shader_recompiler/frontend/ir/reg.h            |  11 +
 src/shader_recompiler/frontend/ir/value.h          |   1 +
 src/shader_recompiler/frontend/maxwell/maxwell.inc |   4 +-
 src/shader_recompiler/frontend/maxwell/program.cpp |   1 +
 .../maxwell/translate/impl/not_implemented.cpp     |   8 -
 .../maxwell/translate/impl/texture_sample.cpp      | 232 +++++++++
 .../ir_opt/collect_shader_info_pass.cpp            |  19 +
 .../global_memory_to_storage_buffer_pass.cpp       |  15 +-
 src/shader_recompiler/ir_opt/passes.h              |   2 +
 src/shader_recompiler/ir_opt/texture_pass.cpp      | 199 +++++++
 src/shader_recompiler/shader_info.h                |  52 +-
 .../renderer_vulkan/vk_compute_pipeline.cpp        | 101 ++++
 .../renderer_vulkan/vk_compute_pipeline.h          |   4 +
 .../renderer_vulkan/vk_pipeline_cache.cpp          |   4 +
 src/video_core/renderer_vulkan/vk_rasterizer.cpp   |   3 +-
 33 files changed, 1489 insertions(+), 342 deletions(-)
 create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp
 create mode 100644 src/shader_recompiler/ir_opt/texture_pass.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index cc38b28ed6..fa268d38fc 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -9,6 +9,7 @@ add_library(shader_recompiler STATIC
     backend/spirv/emit_spirv_control_flow.cpp
     backend/spirv/emit_spirv_convert.cpp
     backend/spirv/emit_spirv_floating_point.cpp
+    backend/spirv/emit_spirv_image.cpp
     backend/spirv/emit_spirv_integer.cpp
     backend/spirv/emit_spirv_logical.cpp
     backend/spirv/emit_spirv_memory.cpp
@@ -100,6 +101,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/predicate_set_predicate.cpp
     frontend/maxwell/translate/impl/predicate_set_register.cpp
     frontend/maxwell/translate/impl/select_source_with_predicate.cpp
+    frontend/maxwell/translate/impl/texture_sample.cpp
     frontend/maxwell/translate/translate.cpp
     frontend/maxwell/translate/translate.h
     ir_opt/collect_shader_info_pass.cpp
@@ -110,6 +112,7 @@ add_library(shader_recompiler STATIC
     ir_opt/lower_fp16_to_fp32.cpp
     ir_opt/passes.h
     ir_opt/ssa_rewrite_pass.cpp
+    ir_opt/texture_pass.cpp
     ir_opt/verification_pass.cpp
     object_pool.h
     profile.h
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index d2dbd56d44..21900d3878 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -12,6 +12,43 @@
 #include "shader_recompiler/backend/spirv/emit_context.h"
 
 namespace Shader::Backend::SPIRV {
+namespace {
+Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
+    const spv::ImageFormat format{spv::ImageFormat::Unknown};
+    const Id type{ctx.F32[1]};
+    switch (desc.type) {
+    case TextureType::Color1D:
+        return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 1, format);
+    case TextureType::ColorArray1D:
+        return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 1, format);
+    case TextureType::Color2D:
+        return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 1, format);
+    case TextureType::ColorArray2D:
+        return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 1, format);
+    case TextureType::Color3D:
+        return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 1, format);
+    case TextureType::ColorCube:
+        return ctx.TypeImage(type, spv::Dim::Cube, false, false, false, 1, format);
+    case TextureType::ColorArrayCube:
+        return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format);
+    case TextureType::Shadow1D:
+        return ctx.TypeImage(type, spv::Dim::Dim1D, true, false, false, 1, format);
+    case TextureType::ShadowArray1D:
+        return ctx.TypeImage(type, spv::Dim::Dim1D, true, true, false, 1, format);
+    case TextureType::Shadow2D:
+        return ctx.TypeImage(type, spv::Dim::Dim2D, true, false, false, 1, format);
+    case TextureType::ShadowArray2D:
+        return ctx.TypeImage(type, spv::Dim::Dim2D, true, true, false, 1, format);
+    case TextureType::Shadow3D:
+        return ctx.TypeImage(type, spv::Dim::Dim3D, true, false, false, 1, format);
+    case TextureType::ShadowCube:
+        return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format);
+    case TextureType::ShadowArrayCube:
+        return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format);
+    }
+    throw InvalidArgument("Invalid texture type {}", desc.type);
+}
+} // Anonymous namespace
 
 void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
     defs[0] = sirit_ctx.Name(base_type, name);
@@ -35,6 +72,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program)
     u32 binding{};
     DefineConstantBuffers(program.info, binding);
     DefineStorageBuffers(program.info, binding);
+    DefineTextures(program.info, binding);
 
     DefineLabels(program);
 }
@@ -46,6 +84,10 @@ Id EmitContext::Def(const IR::Value& value) {
         return value.Inst()->Definition<Id>();
     }
     switch (value.Type()) {
+    case IR::Type::Void:
+        // Void instructions are used for optional arguments (e.g. texture offsets)
+        // They are not meant to be used in the SPIR-V module
+        return Id{};
     case IR::Type::U1:
         return value.U1() ? true_value : false_value;
     case IR::Type::U32:
@@ -122,7 +164,7 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
     uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]);
 
     u32 index{};
-    for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
+    for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
         const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)};
         Decorate(id, spv::Decoration::Binding, binding);
         Decorate(id, spv::Decoration::DescriptorSet, 0U);
@@ -152,7 +194,7 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
     storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
 
     u32 index{};
-    for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
+    for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
         const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)};
         Decorate(id, spv::Decoration::Binding, binding);
         Decorate(id, spv::Decoration::DescriptorSet, 0U);
@@ -163,6 +205,29 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
     }
 }
 
+void EmitContext::DefineTextures(const Info& info, u32& binding) {
+    textures.reserve(info.texture_descriptors.size());
+    for (const TextureDescriptor& desc : info.texture_descriptors) {
+        if (desc.count != 1) {
+            throw NotImplementedException("Array of textures");
+        }
+        const Id type{TypeSampledImage(ImageType(*this, desc))};
+        const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, type)};
+        const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
+        Decorate(id, spv::Decoration::Binding, binding);
+        Decorate(id, spv::Decoration::DescriptorSet, 0U);
+        Name(id, fmt::format("tex{}_{:02x}", desc.cbuf_index, desc.cbuf_offset));
+        for (u32 index = 0; index < desc.count; ++index) {
+            // TODO: Pass count info
+            textures.push_back(TextureDefinition{
+                .id{id},
+                .type{type},
+            });
+        }
+        binding += desc.count;
+    }
+}
+
 void EmitContext::DefineLabels(IR::Program& program) {
     for (const IR::Function& function : program.functions) {
         for (IR::Block* const block : function.blocks) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index d20cf387ef..8b3109eb8c 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -29,6 +29,11 @@ private:
     std::array<Id, 4> defs{};
 };
 
+struct TextureDefinition {
+    Id id;
+    Id type;
+};
+
 class EmitContext final : public Sirit::Module {
 public:
     explicit EmitContext(const Profile& profile, IR::Program& program);
@@ -56,6 +61,7 @@ public:
 
     std::array<Id, Info::MAX_CBUFS> cbufs{};
     std::array<Id, Info::MAX_SSBOS> ssbos{};
+    std::vector<TextureDefinition> textures;
 
     Id workgroup_id{};
     Id local_invocation_id{};
@@ -66,6 +72,7 @@ private:
     void DefineSpecialVariables(const Info& info);
     void DefineConstantBuffers(const Info& info, u32& binding);
     void DefineStorageBuffers(const Info& info, u32& binding);
+    void DefineTextures(const Info& info, u32& binding);
     void DefineLabels(IR::Program& program);
 };
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 8097fe82dc..a94e9cb2d2 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -221,6 +221,14 @@ std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program
                          workgroup_size[2]);
 
     SetupDenormControl(profile, program, ctx, func);
+    if (info.uses_sampled_1d) {
+        ctx.AddCapability(spv::Capability::Sampled1D);
+    }
+    if (info.uses_sparse_residency) {
+        ctx.AddCapability(spv::Capability::SparseResidency);
+    }
+    // TODO: Track this usage
+    ctx.AddCapability(spv::Capability::ImageGatherExtended);
 
     return ctx.Assemble();
 }
@@ -259,4 +267,8 @@ void EmitGetOverflowFromOp(EmitContext&) {
     throw LogicError("Unreachable instruction");
 }
 
+void EmitGetSparseFromOp(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 92387ca280..69698c478e 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -83,7 +83,8 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Va
                         Id value);
 void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
                         Id value);
-void EmitWriteStorage128(EmitContext& ctx);
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value);
 Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
 Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
 Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
@@ -145,6 +146,7 @@ void EmitGetZeroFromOp(EmitContext& ctx);
 void EmitGetSignFromOp(EmitContext& ctx);
 void EmitGetCarryFromOp(EmitContext& ctx);
 void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitGetSparseFromOp(EmitContext& ctx);
 Id EmitFPAbs16(EmitContext& ctx, Id value);
 Id EmitFPAbs32(EmitContext& ctx, Id value);
 Id EmitFPAbs64(EmitContext& ctx, Id value);
@@ -291,5 +293,33 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value);
 Id EmitConvertF32F16(EmitContext& ctx, Id value);
 Id EmitConvertF32F64(EmitContext& ctx, Id value);
 Id EmitConvertF64F32(EmitContext& ctx, Id value);
+Id EmitConvertF16S32(EmitContext& ctx, Id value);
+Id EmitConvertF16S64(EmitContext& ctx, Id value);
+Id EmitConvertF16U32(EmitContext& ctx, Id value);
+Id EmitConvertF16U64(EmitContext& ctx, Id value);
+Id EmitConvertF32S32(EmitContext& ctx, Id value);
+Id EmitConvertF32S64(EmitContext& ctx, Id value);
+Id EmitConvertF32U32(EmitContext& ctx, Id value);
+Id EmitConvertF32U64(EmitContext& ctx, Id value);
+Id EmitConvertF64S32(EmitContext& ctx, Id value);
+Id EmitConvertF64S64(EmitContext& ctx, Id value);
+Id EmitConvertF64U32(EmitContext& ctx, Id value);
+Id EmitConvertF64U64(EmitContext& ctx, Id value);
+Id EmitBindlessImageSampleImplicitLod(EmitContext&);
+Id EmitBindlessImageSampleExplicitLod(EmitContext&);
+Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
+Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
+Id EmitBoundImageSampleImplicitLod(EmitContext&);
+Id EmitBoundImageSampleExplicitLod(EmitContext&);
+Id EmitBoundImageSampleDrefImplicitLod(EmitContext&);
+Id EmitBoundImageSampleDrefExplicitLod(EmitContext&);
+Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                              Id bias_lc, Id offset);
+Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                              Id lod_lc, Id offset);
+Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+                                  Id coords, Id dref, Id bias_lc, Id offset);
+Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+                                  Id coords, Id dref, Id lod_lc, Id offset);
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
index edcc2a1cca..2aff673aa5 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
@@ -102,4 +102,52 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) {
     return ctx.OpFConvert(ctx.F64[1], value);
 }
 
+Id EmitConvertF16S32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16S64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16U32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16U64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF32S32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32S64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32U32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32U64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF64S32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64S64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64U32(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64U64(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F64[1], value);
+}
+
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
new file mode 100644
index 0000000000..5f4783c952
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -0,0 +1,146 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <boost/container/static_vector.hpp>
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+class ImageOperands {
+public:
+    explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp,
+                           Id lod, Id offset) {
+        if (has_bias) {
+            const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
+            Add(spv::ImageOperandsMask::Bias, bias);
+        }
+        if (has_lod) {
+            const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
+            Add(spv::ImageOperandsMask::Lod, lod_value);
+        }
+        if (Sirit::ValidId(offset)) {
+            Add(spv::ImageOperandsMask::Offset, offset);
+        }
+        if (has_lod_clamp) {
+            const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod};
+            Add(spv::ImageOperandsMask::MinLod, lod_clamp);
+        }
+    }
+
+    void Add(spv::ImageOperandsMask new_mask, Id value) {
+        mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
+                                                   static_cast<unsigned>(new_mask));
+        operands.push_back(value);
+    }
+
+    std::span<const Id> Span() const noexcept {
+        return std::span{operands.data(), operands.size()};
+    }
+
+    spv::ImageOperandsMask Mask() const noexcept {
+        return mask;
+    }
+
+private:
+    boost::container::static_vector<Id, 3> operands;
+    spv::ImageOperandsMask mask{};
+};
+
+Id Texture(EmitContext& ctx, const IR::Value& index) {
+    if (index.IsImmediate()) {
+        const TextureDefinition def{ctx.textures.at(index.U32())};
+        return ctx.OpLoad(def.type, def.id);
+    }
+    throw NotImplementedException("Indirect texture sample");
+}
+
+template <typename MethodPtrType, typename... Args>
+Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst,
+        Id result_type, Args&&... args) {
+    IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+    if (!sparse) {
+        return (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...);
+    }
+    const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)};
+    const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)};
+    const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)};
+    sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code));
+    sparse->Invalidate();
+    return ctx.OpCompositeExtract(result_type, sample, 1U);
+}
+} // Anonymous namespace
+
+Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
+Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                              Id bias_lc, Id offset) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc,
+                                 offset);
+    return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
+                &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index),
+                coords, operands.Mask(), operands.Span());
+}
+
+Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                              Id lod_lc, Id offset) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset);
+    return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
+                &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index),
+                coords, operands.Mask(), operands.Span());
+}
+
+Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+                                  Id coords, Id dref, Id bias_lc, Id offset) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc,
+                                 offset);
+    return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod,
+                &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1],
+                Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
+}
+
+Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+                                  Id coords, Id dref, Id lod_lc, Id offset) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset);
+    return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
+                &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
+                Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
index 808c1b4016..7d3efc7418 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
@@ -154,8 +154,22 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va
     ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
 }
 
-void EmitWriteStorage128(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value) {
+    if (!binding.IsImmediate()) {
+        throw NotImplementedException("Dynamic storage buffer indexing");
+    }
+    // TODO: Support reinterpreting bindings, guaranteed to be aligned
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    for (u32 element = 0; element < 4; ++element) {
+        Id index = base_index;
+        if (element > 0) {
+            index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element));
+        }
+        const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)};
+        ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, element));
+    }
 }
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index 0ba681fb96..0fcb68050d 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -12,6 +12,8 @@ public:
 
     [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0;
 
+    [[nodiscard]] virtual u32 TextureBoundBuffer() = 0;
+
     [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0;
 };
 
diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp
index 5127523f97..21700c72b0 100644
--- a/src/shader_recompiler/file_environment.cpp
+++ b/src/shader_recompiler/file_environment.cpp
@@ -39,6 +39,10 @@ u64 FileEnvironment::ReadInstruction(u32 offset) {
     return data[offset / 8];
 }
 
+u32 FileEnvironment::TextureBoundBuffer() {
+    throw NotImplementedException("Texture bound buffer serialization");
+}
+
 std::array<u32, 3> FileEnvironment::WorkgroupSize() {
     return {1, 1, 1};
 }
diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h
index b8c4bbadd9..62302bc8ed 100644
--- a/src/shader_recompiler/file_environment.h
+++ b/src/shader_recompiler/file_environment.h
@@ -3,7 +3,7 @@
 #include <vector>
 
 #include "common/common_types.h"
-#include "environment.h"
+#include "shader_recompiler/environment.h"
 
 namespace Shader {
 
@@ -14,6 +14,8 @@ public:
 
     u64 ReadInstruction(u32 offset) override;
 
+    u32 TextureBoundBuffer() override;
+
     std::array<u32, 3> WorkgroupSize() override;
 
 private:
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index f38b46bace..ae3354c669 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -7,11 +7,24 @@
 #include "shader_recompiler/frontend/ir/value.h"
 
 namespace Shader::IR {
-
-[[noreturn]] static void ThrowInvalidType(Type type) {
+namespace {
+[[noreturn]] void ThrowInvalidType(Type type) {
     throw InvalidArgument("Invalid type {}", type);
 }
 
+Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
+    if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
+        return ir.CompositeConstruct(bias_lod, lod_clamp);
+    } else if (!bias_lod.IsEmpty()) {
+        return bias_lod;
+    } else if (!lod_clamp.IsEmpty()) {
+        return lod_clamp;
+    } else {
+        return Value{};
+    }
+}
+} // Anonymous namespace
+
 U1 IREmitter::Imm1(bool value) const {
     return U1{Value{value}};
 }
@@ -261,6 +274,10 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) {
     return Inst<U1>(Opcode::GetOverflowFromOp, op);
 }
 
+U1 IREmitter::GetSparseFromOp(const Value& op) {
+    return Inst<U1>(Opcode::GetSparseFromOp, op);
+}
+
 F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
     if (a.Type() != a.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
@@ -1035,6 +1052,82 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v
     }
 }
 
+F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) {
+    switch (bitsize) {
+    case 16:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F16>(Opcode::ConvertF16S32, value);
+        case Type::U64:
+            return Inst<F16>(Opcode::ConvertF16S64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 32:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F32>(Opcode::ConvertF32S32, value);
+        case Type::U64:
+            return Inst<F32>(Opcode::ConvertF32S64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 64:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F16>(Opcode::ConvertF64S32, value);
+        case Type::U64:
+            return Inst<F16>(Opcode::ConvertF64S64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    default:
+        throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+    }
+}
+
+F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) {
+    switch (bitsize) {
+    case 16:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F16>(Opcode::ConvertF16U32, value);
+        case Type::U64:
+            return Inst<F16>(Opcode::ConvertF16U64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 32:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F32>(Opcode::ConvertF32U32, value);
+        case Type::U64:
+            return Inst<F32>(Opcode::ConvertF32U64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 64:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F16>(Opcode::ConvertF64U32, value);
+        case Type::U64:
+            return Inst<F16>(Opcode::ConvertF64U64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    default:
+        throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+    }
+}
+
+F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) {
+    if (is_signed) {
+        return ConvertSToF(bitsize, value);
+    } else {
+        return ConvertUToF(bitsize, value);
+    }
+}
+
 U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
     switch (result_bitsize) {
     case 32:
@@ -1107,4 +1200,40 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
     throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
 }
 
+Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
+                                        const Value& offset, const F32& lod_clamp,
+                                        TextureInstInfo info) {
+    const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod
+                                         : Opcode::BindlessImageSampleImplicitLod};
+    return Inst(op, Flags{info}, handle, coords, bias_lc, offset);
+}
+
+Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
+                                        const Value& offset, const F32& lod_clamp,
+                                        TextureInstInfo info) {
+    const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)};
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod
+                                         : Opcode::BindlessImageSampleExplicitLod};
+    return Inst(op, Flags{info}, handle, coords, lod_lc, offset);
+}
+
+F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
+                                          const F32& bias, const Value& offset,
+                                          const F32& lod_clamp, TextureInstInfo info) {
+    const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod
+                                         : Opcode::BindlessImageSampleDrefImplicitLod};
+    return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset);
+}
+
+F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
+                                          const F32& lod, const Value& offset, const F32& lod_clamp,
+                                          TextureInstInfo info) {
+    const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)};
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod
+                                         : Opcode::BindlessImageSampleDrefExplicitLod};
+    return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset);
+}
+
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 6e29bf0e20..cb2a7710a1 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -91,6 +91,7 @@ public:
     [[nodiscard]] U1 GetSignFromOp(const Value& op);
     [[nodiscard]] U1 GetCarryFromOp(const Value& op);
     [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
+    [[nodiscard]] U1 GetSparseFromOp(const Value& op);
 
     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
@@ -159,7 +160,7 @@ public:
     [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
                                      const U32& count);
     [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
-                                      bool is_signed);
+                                      bool is_signed = false);
     [[nodiscard]] U32 BitReverse(const U32& value);
     [[nodiscard]] U32 BitCount(const U32& value);
     [[nodiscard]] U32 BitwiseNot(const U32& value);
@@ -186,10 +187,28 @@ public:
     [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
     [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
     [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
+    [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value);
+    [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value);
+    [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value);
 
     [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
     [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
 
+    [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
+                                               const F32& bias, const Value& offset,
+                                               const F32& lod_clamp, TextureInstInfo info);
+    [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
+                                               const F32& lod, const Value& offset,
+                                               const F32& lod_clamp, TextureInstInfo info);
+    [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
+                                                 const F32& dref, const F32& bias,
+                                                 const Value& offset, const F32& lod_clamp,
+                                                 TextureInstInfo info);
+    [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
+                                                 const F32& dref, const F32& lod,
+                                                 const Value& offset, const F32& lod_clamp,
+                                                 TextureInstInfo info);
+
 private:
     IR::Block::iterator insertion_point;
 
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index d6a9be87d5..88e186f215 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -10,26 +10,27 @@
 #include "shader_recompiler/frontend/ir/type.h"
 
 namespace Shader::IR {
-
-static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
+namespace {
+void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
     if (inst && inst->Opcode() != opcode) {
         throw LogicError("Invalid pseudo-instruction");
     }
 }
 
-static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
+void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
     if (dest_inst) {
         throw LogicError("Only one of each type of pseudo-op allowed");
     }
     dest_inst = pseudo_inst;
 }
 
-static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
+void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
     if (inst->Opcode() != expected_opcode) {
         throw LogicError("Undoing use of invalid pseudo-op");
     }
     inst = nullptr;
 }
+} // Anonymous namespace
 
 Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
     if (op == Opcode::Phi) {
@@ -82,6 +83,7 @@ bool Inst::IsPseudoInstruction() const noexcept {
     case Opcode::GetSignFromOp:
     case Opcode::GetCarryFromOp:
     case Opcode::GetOverflowFromOp:
+    case Opcode::GetSparseFromOp:
         return true;
     default:
         return false;
@@ -96,25 +98,26 @@ bool Inst::AreAllArgsImmediates() const {
                        [](const IR::Value& value) { return value.IsImmediate(); });
 }
 
-bool Inst::HasAssociatedPseudoOperation() const noexcept {
-    return zero_inst || sign_inst || carry_inst || overflow_inst;
-}
-
 Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
-    // This is faster than doing a search through the block.
+    if (!associated_insts) {
+        return nullptr;
+    }
     switch (opcode) {
     case Opcode::GetZeroFromOp:
-        CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp);
-        return zero_inst;
+        CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp);
+        return associated_insts->zero_inst;
     case Opcode::GetSignFromOp:
-        CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp);
-        return sign_inst;
+        CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp);
+        return associated_insts->sign_inst;
     case Opcode::GetCarryFromOp:
-        CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp);
-        return carry_inst;
+        CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp);
+        return associated_insts->carry_inst;
     case Opcode::GetOverflowFromOp:
-        CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp);
-        return overflow_inst;
+        CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp);
+        return associated_insts->overflow_inst;
+    case Opcode::GetSparseFromOp:
+        CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
+        return associated_insts->sparse_inst;
     default:
         throw InvalidArgument("{} is not a pseudo-instruction", opcode);
     }
@@ -220,22 +223,37 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) {
     op = opcode;
 }
 
+void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
+    if (!associated_insts) {
+        associated_insts = std::make_unique<AssociatedInsts>();
+    }
+}
+
 void Inst::Use(const Value& value) {
     Inst* const inst{value.Inst()};
     ++inst->use_count;
 
+    std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
     switch (op) {
     case Opcode::GetZeroFromOp:
-        SetPseudoInstruction(inst->zero_inst, this);
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->zero_inst, this);
         break;
     case Opcode::GetSignFromOp:
-        SetPseudoInstruction(inst->sign_inst, this);
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->sign_inst, this);
         break;
     case Opcode::GetCarryFromOp:
-        SetPseudoInstruction(inst->carry_inst, this);
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->carry_inst, this);
         break;
     case Opcode::GetOverflowFromOp:
-        SetPseudoInstruction(inst->overflow_inst, this);
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->overflow_inst, this);
+        break;
+    case Opcode::GetSparseFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->sparse_inst, this);
         break;
     default:
         break;
@@ -246,18 +264,23 @@ void Inst::UndoUse(const Value& value) {
     Inst* const inst{value.Inst()};
     --inst->use_count;
 
+    std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
     switch (op) {
     case Opcode::GetZeroFromOp:
-        RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp);
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp);
         break;
     case Opcode::GetSignFromOp:
-        RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp);
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp);
         break;
     case Opcode::GetCarryFromOp:
-        RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp);
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp);
         break;
     case Opcode::GetOverflowFromOp:
-        RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp);
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
         break;
     default:
         break;
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 321393dd7c..d5336c4383 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -22,7 +22,7 @@ namespace Shader::IR {
 
 class Block;
 
-constexpr size_t MAX_ARG_COUNT = 4;
+struct AssociatedInsts;
 
 class Inst : public boost::intrusive::list_base_hook<> {
 public:
@@ -50,6 +50,11 @@ public:
         return op;
     }
 
+    /// Determines if there is a pseudo-operation associated with this instruction.
+    [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
+        return associated_insts != nullptr;
+    }
+
     /// Determines whether or not this instruction may have side effects.
     [[nodiscard]] bool MayHaveSideEffects() const noexcept;
 
@@ -60,8 +65,6 @@ public:
     /// Determines if all arguments of this instruction are immediates.
     [[nodiscard]] bool AreAllArgsImmediates() const;
 
-    /// Determines if there is a pseudo-operation associated with this instruction.
-    [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept;
     /// Gets a pseudo-operation associated with this instruction
     [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
 
@@ -122,14 +125,21 @@ private:
     u32 definition{};
     union {
         NonTriviallyDummy dummy{};
-        std::array<Value, MAX_ARG_COUNT> args;
         std::vector<std::pair<Block*, Value>> phi_args;
+        std::array<Value, 5> args;
+    };
+    std::unique_ptr<AssociatedInsts> associated_insts;
+};
+static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
+
+struct AssociatedInsts {
+    union {
+        Inst* sparse_inst;
+        Inst* zero_inst{};
     };
-    Inst* zero_inst{};
     Inst* sign_inst{};
     Inst* carry_inst{};
     Inst* overflow_inst{};
 };
-static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size");
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
index 44652eae7c..ad07700aef 100644
--- a/src/shader_recompiler/frontend/ir/modifiers.h
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -4,7 +4,9 @@
 
 #pragma once
 
+#include "common/bit_field.h"
 #include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
 
 namespace Shader::IR {
 
@@ -30,4 +32,12 @@ struct FpControl {
 };
 static_assert(sizeof(FpControl) <= sizeof(u32));
 
+union TextureInstInfo {
+    u32 raw;
+    BitField<0, 8, TextureType> type;
+    BitField<8, 1, u32> has_bias;
+    BitField<16, 1, u32> has_lod_clamp;
+};
+static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
+
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
index 1f188411a9..8492a13d54 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.cpp
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -14,7 +14,7 @@ namespace {
 struct OpcodeMeta {
     std::string_view name;
     Type type;
-    std::array<Type, 4> arg_types;
+    std::array<Type, 5> arg_types;
 };
 
 using enum Type;
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index c4e72c84d9..aa011fab1b 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -2,301 +2,330 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-//     opcode name,                                         return type,    arg1 type,      arg2 type,      arg3 type,      arg4 type,      ...
-OPCODE(Phi,                                                 Opaque,                                                                         )
-OPCODE(Identity,                                            Opaque,         Opaque,                                                         )
-OPCODE(Void,                                                Void,                                                                           )
+//     opcode name,                                         return type,    arg1 type,      arg2 type,      arg3 type,      arg4 type,      arg4 type,      ...
+OPCODE(Phi,                                                 Opaque,                                                                                         )
+OPCODE(Identity,                                            Opaque,         Opaque,                                                                         )
+OPCODE(Void,                                                Void,                                                                                           )
 
 // Control flow
-OPCODE(Branch,                                              Void,           Label,                                                          )
-OPCODE(BranchConditional,                                   Void,           U1,             Label,          Label,                          )
-OPCODE(LoopMerge,                                           Void,           Label,          Label,                                          )
-OPCODE(SelectionMerge,                                      Void,           Label,                                                          )
-OPCODE(Return,                                              Void,                                                                           )
+OPCODE(Branch,                                              Void,           Label,                                                                          )
+OPCODE(BranchConditional,                                   Void,           U1,             Label,          Label,                                          )
+OPCODE(LoopMerge,                                           Void,           Label,          Label,                                                          )
+OPCODE(SelectionMerge,                                      Void,           Label,                                                                          )
+OPCODE(Return,                                              Void,                                                                                           )
 
 // Context getters/setters
-OPCODE(GetRegister,                                         U32,            Reg,                                                            )
-OPCODE(SetRegister,                                         Void,           Reg,            U32,                                            )
-OPCODE(GetPred,                                             U1,             Pred,                                                           )
-OPCODE(SetPred,                                             Void,           Pred,           U1,                                             )
-OPCODE(GetGotoVariable,                                     U1,             U32,                                                            )
-OPCODE(SetGotoVariable,                                     Void,           U32,            U1,                                             )
-OPCODE(GetCbuf,                                             U32,            U32,            U32,                                            )
-OPCODE(GetAttribute,                                        U32,            Attribute,                                                      )
-OPCODE(SetAttribute,                                        Void,           Attribute,      U32,                                            )
-OPCODE(GetAttributeIndexed,                                 U32,            U32,                                                            )
-OPCODE(SetAttributeIndexed,                                 Void,           U32,            U32,                                            )
-OPCODE(GetZFlag,                                            U1,             Void,                                                           )
-OPCODE(GetSFlag,                                            U1,             Void,                                                           )
-OPCODE(GetCFlag,                                            U1,             Void,                                                           )
-OPCODE(GetOFlag,                                            U1,             Void,                                                           )
-OPCODE(SetZFlag,                                            Void,           U1,                                                             )
-OPCODE(SetSFlag,                                            Void,           U1,                                                             )
-OPCODE(SetCFlag,                                            Void,           U1,                                                             )
-OPCODE(SetOFlag,                                            Void,           U1,                                                             )
-OPCODE(WorkgroupId,                                         U32x3,                                                                          )
-OPCODE(LocalInvocationId,                                   U32x3,                                                                          )
+OPCODE(GetRegister,                                         U32,            Reg,                                                                            )
+OPCODE(SetRegister,                                         Void,           Reg,            U32,                                                            )
+OPCODE(GetPred,                                             U1,             Pred,                                                                           )
+OPCODE(SetPred,                                             Void,           Pred,           U1,                                                             )
+OPCODE(GetGotoVariable,                                     U1,             U32,                                                                            )
+OPCODE(SetGotoVariable,                                     Void,           U32,            U1,                                                             )
+OPCODE(GetCbuf,                                             U32,            U32,            U32,                                                            )
+OPCODE(GetAttribute,                                        U32,            Attribute,                                                                      )
+OPCODE(SetAttribute,                                        Void,           Attribute,      U32,                                                            )
+OPCODE(GetAttributeIndexed,                                 U32,            U32,                                                                            )
+OPCODE(SetAttributeIndexed,                                 Void,           U32,            U32,                                                            )
+OPCODE(GetZFlag,                                            U1,             Void,                                                                           )
+OPCODE(GetSFlag,                                            U1,             Void,                                                                           )
+OPCODE(GetCFlag,                                            U1,             Void,                                                                           )
+OPCODE(GetOFlag,                                            U1,             Void,                                                                           )
+OPCODE(SetZFlag,                                            Void,           U1,                                                                             )
+OPCODE(SetSFlag,                                            Void,           U1,                                                                             )
+OPCODE(SetCFlag,                                            Void,           U1,                                                                             )
+OPCODE(SetOFlag,                                            Void,           U1,                                                                             )
+OPCODE(WorkgroupId,                                         U32x3,                                                                                          )
+OPCODE(LocalInvocationId,                                   U32x3,                                                                                          )
 
 // Undefined
-OPCODE(UndefU1,                                             U1,                                                                             )
-OPCODE(UndefU8,                                             U8,                                                                             )
-OPCODE(UndefU16,                                            U16,                                                                            )
-OPCODE(UndefU32,                                            U32,                                                                            )
-OPCODE(UndefU64,                                            U64,                                                                            )
+OPCODE(UndefU1,                                             U1,                                                                                             )
+OPCODE(UndefU8,                                             U8,                                                                                             )
+OPCODE(UndefU16,                                            U16,                                                                                            )
+OPCODE(UndefU32,                                            U32,                                                                                            )
+OPCODE(UndefU64,                                            U64,                                                                                            )
 
 // Memory operations
-OPCODE(LoadGlobalU8,                                        U32,            U64,                                                            )
-OPCODE(LoadGlobalS8,                                        U32,            U64,                                                            )
-OPCODE(LoadGlobalU16,                                       U32,            U64,                                                            )
-OPCODE(LoadGlobalS16,                                       U32,            U64,                                                            )
-OPCODE(LoadGlobal32,                                        U32,            U64,                                                            )
-OPCODE(LoadGlobal64,                                        U32x2,          U64,                                                            )
-OPCODE(LoadGlobal128,                                       U32x4,          U64,                                                            )
-OPCODE(WriteGlobalU8,                                       Void,           U64,            U32,                                            )
-OPCODE(WriteGlobalS8,                                       Void,           U64,            U32,                                            )
-OPCODE(WriteGlobalU16,                                      Void,           U64,            U32,                                            )
-OPCODE(WriteGlobalS16,                                      Void,           U64,            U32,                                            )
-OPCODE(WriteGlobal32,                                       Void,           U64,            U32,                                            )
-OPCODE(WriteGlobal64,                                       Void,           U64,            U32x2,                                          )
-OPCODE(WriteGlobal128,                                      Void,           U64,            U32x4,                                          )
+OPCODE(LoadGlobalU8,                                        U32,            U64,                                                                            )
+OPCODE(LoadGlobalS8,                                        U32,            U64,                                                                            )
+OPCODE(LoadGlobalU16,                                       U32,            U64,                                                                            )
+OPCODE(LoadGlobalS16,                                       U32,            U64,                                                                            )
+OPCODE(LoadGlobal32,                                        U32,            U64,                                                                            )
+OPCODE(LoadGlobal64,                                        U32x2,          U64,                                                                            )
+OPCODE(LoadGlobal128,                                       U32x4,          U64,                                                                            )
+OPCODE(WriteGlobalU8,                                       Void,           U64,            U32,                                                            )
+OPCODE(WriteGlobalS8,                                       Void,           U64,            U32,                                                            )
+OPCODE(WriteGlobalU16,                                      Void,           U64,            U32,                                                            )
+OPCODE(WriteGlobalS16,                                      Void,           U64,            U32,                                                            )
+OPCODE(WriteGlobal32,                                       Void,           U64,            U32,                                                            )
+OPCODE(WriteGlobal64,                                       Void,           U64,            U32x2,                                                          )
+OPCODE(WriteGlobal128,                                      Void,           U64,            U32x4,                                                          )
 
 // Storage buffer operations
-OPCODE(LoadStorageU8,                                       U32,            U32,            U32,                                            )
-OPCODE(LoadStorageS8,                                       U32,            U32,            U32,                                            )
-OPCODE(LoadStorageU16,                                      U32,            U32,            U32,                                            )
-OPCODE(LoadStorageS16,                                      U32,            U32,            U32,                                            )
-OPCODE(LoadStorage32,                                       U32,            U32,            U32,                                            )
-OPCODE(LoadStorage64,                                       U32x2,          U32,            U32,                                            )
-OPCODE(LoadStorage128,                                      U32x4,          U32,            U32,                                            )
-OPCODE(WriteStorageU8,                                      Void,           U32,            U32,            U32,                            )
-OPCODE(WriteStorageS8,                                      Void,           U32,            U32,            U32,                            )
-OPCODE(WriteStorageU16,                                     Void,           U32,            U32,            U32,                            )
-OPCODE(WriteStorageS16,                                     Void,           U32,            U32,            U32,                            )
-OPCODE(WriteStorage32,                                      Void,           U32,            U32,            U32,                            )
-OPCODE(WriteStorage64,                                      Void,           U32,            U32,            U32x2,                          )
-OPCODE(WriteStorage128,                                     Void,           U32,            U32,            U32x4,                          )
+OPCODE(LoadStorageU8,                                       U32,            U32,            U32,                                                            )
+OPCODE(LoadStorageS8,                                       U32,            U32,            U32,                                                            )
+OPCODE(LoadStorageU16,                                      U32,            U32,            U32,                                                            )
+OPCODE(LoadStorageS16,                                      U32,            U32,            U32,                                                            )
+OPCODE(LoadStorage32,                                       U32,            U32,            U32,                                                            )
+OPCODE(LoadStorage64,                                       U32x2,          U32,            U32,                                                            )
+OPCODE(LoadStorage128,                                      U32x4,          U32,            U32,                                                            )
+OPCODE(WriteStorageU8,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageS8,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageU16,                                     Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageS16,                                     Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorage32,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorage64,                                      Void,           U32,            U32,            U32x2,                                          )
+OPCODE(WriteStorage128,                                     Void,           U32,            U32,            U32x4,                                          )
 
 // Vector utility
-OPCODE(CompositeConstructU32x2,                             U32x2,          U32,            U32,                                            )
-OPCODE(CompositeConstructU32x3,                             U32x3,          U32,            U32,            U32,                            )
-OPCODE(CompositeConstructU32x4,                             U32x4,          U32,            U32,            U32,            U32,            )
-OPCODE(CompositeExtractU32x2,                               U32,            U32x2,          U32,                                            )
-OPCODE(CompositeExtractU32x3,                               U32,            U32x3,          U32,                                            )
-OPCODE(CompositeExtractU32x4,                               U32,            U32x4,          U32,                                            )
-OPCODE(CompositeInsertU32x2,                                U32x2,          U32x2,          U32,            U32,                            )
-OPCODE(CompositeInsertU32x3,                                U32x3,          U32x3,          U32,            U32,                            )
-OPCODE(CompositeInsertU32x4,                                U32x4,          U32x4,          U32,            U32,                            )
-OPCODE(CompositeConstructF16x2,                             F16x2,          F16,            F16,                                            )
-OPCODE(CompositeConstructF16x3,                             F16x3,          F16,            F16,            F16,                            )
-OPCODE(CompositeConstructF16x4,                             F16x4,          F16,            F16,            F16,            F16,            )
-OPCODE(CompositeExtractF16x2,                               F16,            F16x2,          U32,                                            )
-OPCODE(CompositeExtractF16x3,                               F16,            F16x3,          U32,                                            )
-OPCODE(CompositeExtractF16x4,                               F16,            F16x4,          U32,                                            )
-OPCODE(CompositeInsertF16x2,                                F16x2,          F16x2,          F16,            U32,                            )
-OPCODE(CompositeInsertF16x3,                                F16x3,          F16x3,          F16,            U32,                            )
-OPCODE(CompositeInsertF16x4,                                F16x4,          F16x4,          F16,            U32,                            )
-OPCODE(CompositeConstructF32x2,                             F32x2,          F32,            F32,                                            )
-OPCODE(CompositeConstructF32x3,                             F32x3,          F32,            F32,            F32,                            )
-OPCODE(CompositeConstructF32x4,                             F32x4,          F32,            F32,            F32,            F32,            )
-OPCODE(CompositeExtractF32x2,                               F32,            F32x2,          U32,                                            )
-OPCODE(CompositeExtractF32x3,                               F32,            F32x3,          U32,                                            )
-OPCODE(CompositeExtractF32x4,                               F32,            F32x4,          U32,                                            )
-OPCODE(CompositeInsertF32x2,                                F32x2,          F32x2,          F32,            U32,                            )
-OPCODE(CompositeInsertF32x3,                                F32x3,          F32x3,          F32,            U32,                            )
-OPCODE(CompositeInsertF32x4,                                F32x4,          F32x4,          F32,            U32,                            )
-OPCODE(CompositeConstructF64x2,                             F64x2,          F64,            F64,                                            )
-OPCODE(CompositeConstructF64x3,                             F64x3,          F64,            F64,            F64,                            )
-OPCODE(CompositeConstructF64x4,                             F64x4,          F64,            F64,            F64,            F64,            )
-OPCODE(CompositeExtractF64x2,                               F64,            F64x2,          U32,                                            )
-OPCODE(CompositeExtractF64x3,                               F64,            F64x3,          U32,                                            )
-OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                            )
-OPCODE(CompositeInsertF64x2,                                F64x2,          F64x2,          F64,            U32,                            )
-OPCODE(CompositeInsertF64x3,                                F64x3,          F64x3,          F64,            U32,                            )
-OPCODE(CompositeInsertF64x4,                                F64x4,          F64x4,          F64,            U32,                            )
+OPCODE(CompositeConstructU32x2,                             U32x2,          U32,            U32,                                                            )
+OPCODE(CompositeConstructU32x3,                             U32x3,          U32,            U32,            U32,                                            )
+OPCODE(CompositeConstructU32x4,                             U32x4,          U32,            U32,            U32,            U32,                            )
+OPCODE(CompositeExtractU32x2,                               U32,            U32x2,          U32,                                                            )
+OPCODE(CompositeExtractU32x3,                               U32,            U32x3,          U32,                                                            )
+OPCODE(CompositeExtractU32x4,                               U32,            U32x4,          U32,                                                            )
+OPCODE(CompositeInsertU32x2,                                U32x2,          U32x2,          U32,            U32,                                            )
+OPCODE(CompositeInsertU32x3,                                U32x3,          U32x3,          U32,            U32,                                            )
+OPCODE(CompositeInsertU32x4,                                U32x4,          U32x4,          U32,            U32,                                            )
+OPCODE(CompositeConstructF16x2,                             F16x2,          F16,            F16,                                                            )
+OPCODE(CompositeConstructF16x3,                             F16x3,          F16,            F16,            F16,                                            )
+OPCODE(CompositeConstructF16x4,                             F16x4,          F16,            F16,            F16,            F16,                            )
+OPCODE(CompositeExtractF16x2,                               F16,            F16x2,          U32,                                                            )
+OPCODE(CompositeExtractF16x3,                               F16,            F16x3,          U32,                                                            )
+OPCODE(CompositeExtractF16x4,                               F16,            F16x4,          U32,                                                            )
+OPCODE(CompositeInsertF16x2,                                F16x2,          F16x2,          F16,            U32,                                            )
+OPCODE(CompositeInsertF16x3,                                F16x3,          F16x3,          F16,            U32,                                            )
+OPCODE(CompositeInsertF16x4,                                F16x4,          F16x4,          F16,            U32,                                            )
+OPCODE(CompositeConstructF32x2,                             F32x2,          F32,            F32,                                                            )
+OPCODE(CompositeConstructF32x3,                             F32x3,          F32,            F32,            F32,                                            )
+OPCODE(CompositeConstructF32x4,                             F32x4,          F32,            F32,            F32,            F32,                            )
+OPCODE(CompositeExtractF32x2,                               F32,            F32x2,          U32,                                                            )
+OPCODE(CompositeExtractF32x3,                               F32,            F32x3,          U32,                                                            )
+OPCODE(CompositeExtractF32x4,                               F32,            F32x4,          U32,                                                            )
+OPCODE(CompositeInsertF32x2,                                F32x2,          F32x2,          F32,            U32,                                            )
+OPCODE(CompositeInsertF32x3,                                F32x3,          F32x3,          F32,            U32,                                            )
+OPCODE(CompositeInsertF32x4,                                F32x4,          F32x4,          F32,            U32,                                            )
+OPCODE(CompositeConstructF64x2,                             F64x2,          F64,            F64,                                                            )
+OPCODE(CompositeConstructF64x3,                             F64x3,          F64,            F64,            F64,                                            )
+OPCODE(CompositeConstructF64x4,                             F64x4,          F64,            F64,            F64,            F64,                            )
+OPCODE(CompositeExtractF64x2,                               F64,            F64x2,          U32,                                                            )
+OPCODE(CompositeExtractF64x3,                               F64,            F64x3,          U32,                                                            )
+OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                                            )
+OPCODE(CompositeInsertF64x2,                                F64x2,          F64x2,          F64,            U32,                                            )
+OPCODE(CompositeInsertF64x3,                                F64x3,          F64x3,          F64,            U32,                                            )
+OPCODE(CompositeInsertF64x4,                                F64x4,          F64x4,          F64,            U32,                                            )
 
 // Select operations
-OPCODE(SelectU1,                                            U1,             U1,             U1,             U1,                             )
-OPCODE(SelectU8,                                            U8,             U1,             U8,             U8,                             )
-OPCODE(SelectU16,                                           U16,            U1,             U16,            U16,                            )
-OPCODE(SelectU32,                                           U32,            U1,             U32,            U32,                            )
-OPCODE(SelectU64,                                           U64,            U1,             U64,            U64,                            )
-OPCODE(SelectF16,                                           F16,            U1,             F16,            F16,                            )
-OPCODE(SelectF32,                                           F32,            U1,             F32,            F32,                            )
+OPCODE(SelectU1,                                            U1,             U1,             U1,             U1,                                             )
+OPCODE(SelectU8,                                            U8,             U1,             U8,             U8,                                             )
+OPCODE(SelectU16,                                           U16,            U1,             U16,            U16,                                            )
+OPCODE(SelectU32,                                           U32,            U1,             U32,            U32,                                            )
+OPCODE(SelectU64,                                           U64,            U1,             U64,            U64,                                            )
+OPCODE(SelectF16,                                           F16,            U1,             F16,            F16,                                            )
+OPCODE(SelectF32,                                           F32,            U1,             F32,            F32,                                            )
 
 // Bitwise conversions
-OPCODE(BitCastU16F16,                                       U16,            F16,                                                            )
-OPCODE(BitCastU32F32,                                       U32,            F32,                                                            )
-OPCODE(BitCastU64F64,                                       U64,            F64,                                                            )
-OPCODE(BitCastF16U16,                                       F16,            U16,                                                            )
-OPCODE(BitCastF32U32,                                       F32,            U32,                                                            )
-OPCODE(BitCastF64U64,                                       F64,            U64,                                                            )
-OPCODE(PackUint2x32,                                        U64,            U32x2,                                                          )
-OPCODE(UnpackUint2x32,                                      U32x2,          U64,                                                            )
-OPCODE(PackFloat2x16,                                       U32,            F16x2,                                                          )
-OPCODE(UnpackFloat2x16,                                     F16x2,          U32,                                                            )
-OPCODE(PackHalf2x16,                                        U32,            F32x2,                                                          )
-OPCODE(UnpackHalf2x16,                                      F32x2,          U32,                                                            )
-OPCODE(PackDouble2x32,                                      F64,            U32x2,                                                          )
-OPCODE(UnpackDouble2x32,                                    U32x2,          F64,                                                            )
+OPCODE(BitCastU16F16,                                       U16,            F16,                                                                            )
+OPCODE(BitCastU32F32,                                       U32,            F32,                                                                            )
+OPCODE(BitCastU64F64,                                       U64,            F64,                                                                            )
+OPCODE(BitCastF16U16,                                       F16,            U16,                                                                            )
+OPCODE(BitCastF32U32,                                       F32,            U32,                                                                            )
+OPCODE(BitCastF64U64,                                       F64,            U64,                                                                            )
+OPCODE(PackUint2x32,                                        U64,            U32x2,                                                                          )
+OPCODE(UnpackUint2x32,                                      U32x2,          U64,                                                                            )
+OPCODE(PackFloat2x16,                                       U32,            F16x2,                                                                          )
+OPCODE(UnpackFloat2x16,                                     F16x2,          U32,                                                                            )
+OPCODE(PackHalf2x16,                                        U32,            F32x2,                                                                          )
+OPCODE(UnpackHalf2x16,                                      F32x2,          U32,                                                                            )
+OPCODE(PackDouble2x32,                                      F64,            U32x2,                                                                          )
+OPCODE(UnpackDouble2x32,                                    U32x2,          F64,                                                                            )
 
 // Pseudo-operation, handled specially at final emit
-OPCODE(GetZeroFromOp,                                       U1,             Opaque,                                                         )
-OPCODE(GetSignFromOp,                                       U1,             Opaque,                                                         )
-OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                         )
-OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                         )
+OPCODE(GetZeroFromOp,                                       U1,             Opaque,                                                                         )
+OPCODE(GetSignFromOp,                                       U1,             Opaque,                                                                         )
+OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                                         )
+OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                                         )
+OPCODE(GetSparseFromOp,                                     U1,             Opaque,                                                                         )
 
 // Floating-point operations
-OPCODE(FPAbs16,                                             F16,            F16,                                                            )
-OPCODE(FPAbs32,                                             F32,            F32,                                                            )
-OPCODE(FPAbs64,                                             F64,            F64,                                                            )
-OPCODE(FPAdd16,                                             F16,            F16,            F16,                                            )
-OPCODE(FPAdd32,                                             F32,            F32,            F32,                                            )
-OPCODE(FPAdd64,                                             F64,            F64,            F64,                                            )
-OPCODE(FPFma16,                                             F16,            F16,            F16,            F16,                            )
-OPCODE(FPFma32,                                             F32,            F32,            F32,            F32,                            )
-OPCODE(FPFma64,                                             F64,            F64,            F64,            F64,                            )
-OPCODE(FPMax32,                                             F32,            F32,            F32,                                            )
-OPCODE(FPMax64,                                             F64,            F64,            F64,                                            )
-OPCODE(FPMin32,                                             F32,            F32,            F32,                                            )
-OPCODE(FPMin64,                                             F64,            F64,            F64,                                            )
-OPCODE(FPMul16,                                             F16,            F16,            F16,                                            )
-OPCODE(FPMul32,                                             F32,            F32,            F32,                                            )
-OPCODE(FPMul64,                                             F64,            F64,            F64,                                            )
-OPCODE(FPNeg16,                                             F16,            F16,                                                            )
-OPCODE(FPNeg32,                                             F32,            F32,                                                            )
-OPCODE(FPNeg64,                                             F64,            F64,                                                            )
-OPCODE(FPRecip32,                                           F32,            F32,                                                            )
-OPCODE(FPRecip64,                                           F64,            F64,                                                            )
-OPCODE(FPRecipSqrt32,                                       F32,            F32,                                                            )
-OPCODE(FPRecipSqrt64,                                       F64,            F64,                                                            )
-OPCODE(FPSqrt,                                              F32,            F32,                                                            )
-OPCODE(FPSin,                                               F32,            F32,                                                            )
-OPCODE(FPExp2,                                              F32,            F32,                                                            )
-OPCODE(FPCos,                                               F32,            F32,                                                            )
-OPCODE(FPLog2,                                              F32,            F32,                                                            )
-OPCODE(FPSaturate16,                                        F16,            F16,                                                            )
-OPCODE(FPSaturate32,                                        F32,            F32,                                                            )
-OPCODE(FPSaturate64,                                        F64,            F64,                                                            )
-OPCODE(FPRoundEven16,                                       F16,            F16,                                                            )
-OPCODE(FPRoundEven32,                                       F32,            F32,                                                            )
-OPCODE(FPRoundEven64,                                       F64,            F64,                                                            )
-OPCODE(FPFloor16,                                           F16,            F16,                                                            )
-OPCODE(FPFloor32,                                           F32,            F32,                                                            )
-OPCODE(FPFloor64,                                           F64,            F64,                                                            )
-OPCODE(FPCeil16,                                            F16,            F16,                                                            )
-OPCODE(FPCeil32,                                            F32,            F32,                                                            )
-OPCODE(FPCeil64,                                            F64,            F64,                                                            )
-OPCODE(FPTrunc16,                                           F16,            F16,                                                            )
-OPCODE(FPTrunc32,                                           F32,            F32,                                                            )
-OPCODE(FPTrunc64,                                           F64,            F64,                                                            )
+OPCODE(FPAbs16,                                             F16,            F16,                                                                            )
+OPCODE(FPAbs32,                                             F32,            F32,                                                                            )
+OPCODE(FPAbs64,                                             F64,            F64,                                                                            )
+OPCODE(FPAdd16,                                             F16,            F16,            F16,                                                            )
+OPCODE(FPAdd32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPAdd64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPFma16,                                             F16,            F16,            F16,            F16,                                            )
+OPCODE(FPFma32,                                             F32,            F32,            F32,            F32,                                            )
+OPCODE(FPFma64,                                             F64,            F64,            F64,            F64,                                            )
+OPCODE(FPMax32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPMax64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPMin32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPMin64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPMul16,                                             F16,            F16,            F16,                                                            )
+OPCODE(FPMul32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPMul64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPNeg16,                                             F16,            F16,                                                                            )
+OPCODE(FPNeg32,                                             F32,            F32,                                                                            )
+OPCODE(FPNeg64,                                             F64,            F64,                                                                            )
+OPCODE(FPRecip32,                                           F32,            F32,                                                                            )
+OPCODE(FPRecip64,                                           F64,            F64,                                                                            )
+OPCODE(FPRecipSqrt32,                                       F32,            F32,                                                                            )
+OPCODE(FPRecipSqrt64,                                       F64,            F64,                                                                            )
+OPCODE(FPSqrt,                                              F32,            F32,                                                                            )
+OPCODE(FPSin,                                               F32,            F32,                                                                            )
+OPCODE(FPExp2,                                              F32,            F32,                                                                            )
+OPCODE(FPCos,                                               F32,            F32,                                                                            )
+OPCODE(FPLog2,                                              F32,            F32,                                                                            )
+OPCODE(FPSaturate16,                                        F16,            F16,                                                                            )
+OPCODE(FPSaturate32,                                        F32,            F32,                                                                            )
+OPCODE(FPSaturate64,                                        F64,            F64,                                                                            )
+OPCODE(FPRoundEven16,                                       F16,            F16,                                                                            )
+OPCODE(FPRoundEven32,                                       F32,            F32,                                                                            )
+OPCODE(FPRoundEven64,                                       F64,            F64,                                                                            )
+OPCODE(FPFloor16,                                           F16,            F16,                                                                            )
+OPCODE(FPFloor32,                                           F32,            F32,                                                                            )
+OPCODE(FPFloor64,                                           F64,            F64,                                                                            )
+OPCODE(FPCeil16,                                            F16,            F16,                                                                            )
+OPCODE(FPCeil32,                                            F32,            F32,                                                                            )
+OPCODE(FPCeil64,                                            F64,            F64,                                                                            )
+OPCODE(FPTrunc16,                                           F16,            F16,                                                                            )
+OPCODE(FPTrunc32,                                           F32,            F32,                                                                            )
+OPCODE(FPTrunc64,                                           F64,            F64,                                                                            )
 
-OPCODE(FPOrdEqual16,                                        U1,             F16,            F16,                                            )
-OPCODE(FPOrdEqual32,                                        U1,             F32,            F32,                                            )
-OPCODE(FPOrdEqual64,                                        U1,             F64,            F64,                                            )
-OPCODE(FPUnordEqual16,                                      U1,             F16,            F16,                                            )
-OPCODE(FPUnordEqual32,                                      U1,             F32,            F32,                                            )
-OPCODE(FPUnordEqual64,                                      U1,             F64,            F64,                                            )
-OPCODE(FPOrdNotEqual16,                                     U1,             F16,            F16,                                            )
-OPCODE(FPOrdNotEqual32,                                     U1,             F32,            F32,                                            )
-OPCODE(FPOrdNotEqual64,                                     U1,             F64,            F64,                                            )
-OPCODE(FPUnordNotEqual16,                                   U1,             F16,            F16,                                            )
-OPCODE(FPUnordNotEqual32,                                   U1,             F32,            F32,                                            )
-OPCODE(FPUnordNotEqual64,                                   U1,             F64,            F64,                                            )
-OPCODE(FPOrdLessThan16,                                     U1,             F16,            F16,                                            )
-OPCODE(FPOrdLessThan32,                                     U1,             F32,            F32,                                            )
-OPCODE(FPOrdLessThan64,                                     U1,             F64,            F64,                                            )
-OPCODE(FPUnordLessThan16,                                   U1,             F16,            F16,                                            )
-OPCODE(FPUnordLessThan32,                                   U1,             F32,            F32,                                            )
-OPCODE(FPUnordLessThan64,                                   U1,             F64,            F64,                                            )
-OPCODE(FPOrdGreaterThan16,                                  U1,             F16,            F16,                                            )
-OPCODE(FPOrdGreaterThan32,                                  U1,             F32,            F32,                                            )
-OPCODE(FPOrdGreaterThan64,                                  U1,             F64,            F64,                                            )
-OPCODE(FPUnordGreaterThan16,                                U1,             F16,            F16,                                            )
-OPCODE(FPUnordGreaterThan32,                                U1,             F32,            F32,                                            )
-OPCODE(FPUnordGreaterThan64,                                U1,             F64,            F64,                                            )
-OPCODE(FPOrdLessThanEqual16,                                U1,             F16,            F16,                                            )
-OPCODE(FPOrdLessThanEqual32,                                U1,             F32,            F32,                                            )
-OPCODE(FPOrdLessThanEqual64,                                U1,             F64,            F64,                                            )
-OPCODE(FPUnordLessThanEqual16,                              U1,             F16,            F16,                                            )
-OPCODE(FPUnordLessThanEqual32,                              U1,             F32,            F32,                                            )
-OPCODE(FPUnordLessThanEqual64,                              U1,             F64,            F64,                                            )
-OPCODE(FPOrdGreaterThanEqual16,                             U1,             F16,            F16,                                            )
-OPCODE(FPOrdGreaterThanEqual32,                             U1,             F32,            F32,                                            )
-OPCODE(FPOrdGreaterThanEqual64,                             U1,             F64,            F64,                                            )
-OPCODE(FPUnordGreaterThanEqual16,                           U1,             F16,            F16,                                            )
-OPCODE(FPUnordGreaterThanEqual32,                           U1,             F32,            F32,                                            )
-OPCODE(FPUnordGreaterThanEqual64,                           U1,             F64,            F64,                                            )
+OPCODE(FPOrdEqual16,                                        U1,             F16,            F16,                                                            )
+OPCODE(FPOrdEqual32,                                        U1,             F32,            F32,                                                            )
+OPCODE(FPOrdEqual64,                                        U1,             F64,            F64,                                                            )
+OPCODE(FPUnordEqual16,                                      U1,             F16,            F16,                                                            )
+OPCODE(FPUnordEqual32,                                      U1,             F32,            F32,                                                            )
+OPCODE(FPUnordEqual64,                                      U1,             F64,            F64,                                                            )
+OPCODE(FPOrdNotEqual16,                                     U1,             F16,            F16,                                                            )
+OPCODE(FPOrdNotEqual32,                                     U1,             F32,            F32,                                                            )
+OPCODE(FPOrdNotEqual64,                                     U1,             F64,            F64,                                                            )
+OPCODE(FPUnordNotEqual16,                                   U1,             F16,            F16,                                                            )
+OPCODE(FPUnordNotEqual32,                                   U1,             F32,            F32,                                                            )
+OPCODE(FPUnordNotEqual64,                                   U1,             F64,            F64,                                                            )
+OPCODE(FPOrdLessThan16,                                     U1,             F16,            F16,                                                            )
+OPCODE(FPOrdLessThan32,                                     U1,             F32,            F32,                                                            )
+OPCODE(FPOrdLessThan64,                                     U1,             F64,            F64,                                                            )
+OPCODE(FPUnordLessThan16,                                   U1,             F16,            F16,                                                            )
+OPCODE(FPUnordLessThan32,                                   U1,             F32,            F32,                                                            )
+OPCODE(FPUnordLessThan64,                                   U1,             F64,            F64,                                                            )
+OPCODE(FPOrdGreaterThan16,                                  U1,             F16,            F16,                                                            )
+OPCODE(FPOrdGreaterThan32,                                  U1,             F32,            F32,                                                            )
+OPCODE(FPOrdGreaterThan64,                                  U1,             F64,            F64,                                                            )
+OPCODE(FPUnordGreaterThan16,                                U1,             F16,            F16,                                                            )
+OPCODE(FPUnordGreaterThan32,                                U1,             F32,            F32,                                                            )
+OPCODE(FPUnordGreaterThan64,                                U1,             F64,            F64,                                                            )
+OPCODE(FPOrdLessThanEqual16,                                U1,             F16,            F16,                                                            )
+OPCODE(FPOrdLessThanEqual32,                                U1,             F32,            F32,                                                            )
+OPCODE(FPOrdLessThanEqual64,                                U1,             F64,            F64,                                                            )
+OPCODE(FPUnordLessThanEqual16,                              U1,             F16,            F16,                                                            )
+OPCODE(FPUnordLessThanEqual32,                              U1,             F32,            F32,                                                            )
+OPCODE(FPUnordLessThanEqual64,                              U1,             F64,            F64,                                                            )
+OPCODE(FPOrdGreaterThanEqual16,                             U1,             F16,            F16,                                                            )
+OPCODE(FPOrdGreaterThanEqual32,                             U1,             F32,            F32,                                                            )
+OPCODE(FPOrdGreaterThanEqual64,                             U1,             F64,            F64,                                                            )
+OPCODE(FPUnordGreaterThanEqual16,                           U1,             F16,            F16,                                                            )
+OPCODE(FPUnordGreaterThanEqual32,                           U1,             F32,            F32,                                                            )
+OPCODE(FPUnordGreaterThanEqual64,                           U1,             F64,            F64,                                                            )
 
 // Integer operations
-OPCODE(IAdd32,                                              U32,            U32,            U32,                                            )
-OPCODE(IAdd64,                                              U64,            U64,            U64,                                            )
-OPCODE(ISub32,                                              U32,            U32,            U32,                                            )
-OPCODE(ISub64,                                              U64,            U64,            U64,                                            )
-OPCODE(IMul32,                                              U32,            U32,            U32,                                            )
-OPCODE(INeg32,                                              U32,            U32,                                                            )
-OPCODE(INeg64,                                              U64,            U64,                                                            )
-OPCODE(IAbs32,                                              U32,            U32,                                                            )
-OPCODE(ShiftLeftLogical32,                                  U32,            U32,            U32,                                            )
-OPCODE(ShiftLeftLogical64,                                  U64,            U64,            U32,                                            )
-OPCODE(ShiftRightLogical32,                                 U32,            U32,            U32,                                            )
-OPCODE(ShiftRightLogical64,                                 U64,            U64,            U32,                                            )
-OPCODE(ShiftRightArithmetic32,                              U32,            U32,            U32,                                            )
-OPCODE(ShiftRightArithmetic64,                              U64,            U64,            U32,                                            )
-OPCODE(BitwiseAnd32,                                        U32,            U32,            U32,                                            )
-OPCODE(BitwiseOr32,                                         U32,            U32,            U32,                                            )
-OPCODE(BitwiseXor32,                                        U32,            U32,            U32,                                            )
-OPCODE(BitFieldInsert,                                      U32,            U32,            U32,            U32,            U32,            )
-OPCODE(BitFieldSExtract,                                    U32,            U32,            U32,            U32,                            )
-OPCODE(BitFieldUExtract,                                    U32,            U32,            U32,            U32,                            )
-OPCODE(BitReverse32,                                        U32,            U32,                                                            )
-OPCODE(BitCount32,                                          U32,            U32,                                                            )
-OPCODE(BitwiseNot32,                                        U32,            U32,                                                            )
+OPCODE(IAdd32,                                              U32,            U32,            U32,                                                            )
+OPCODE(IAdd64,                                              U64,            U64,            U64,                                                            )
+OPCODE(ISub32,                                              U32,            U32,            U32,                                                            )
+OPCODE(ISub64,                                              U64,            U64,            U64,                                                            )
+OPCODE(IMul32,                                              U32,            U32,            U32,                                                            )
+OPCODE(INeg32,                                              U32,            U32,                                                                            )
+OPCODE(INeg64,                                              U64,            U64,                                                                            )
+OPCODE(IAbs32,                                              U32,            U32,                                                                            )
+OPCODE(ShiftLeftLogical32,                                  U32,            U32,            U32,                                                            )
+OPCODE(ShiftLeftLogical64,                                  U64,            U64,            U32,                                                            )
+OPCODE(ShiftRightLogical32,                                 U32,            U32,            U32,                                                            )
+OPCODE(ShiftRightLogical64,                                 U64,            U64,            U32,                                                            )
+OPCODE(ShiftRightArithmetic32,                              U32,            U32,            U32,                                                            )
+OPCODE(ShiftRightArithmetic64,                              U64,            U64,            U32,                                                            )
+OPCODE(BitwiseAnd32,                                        U32,            U32,            U32,                                                            )
+OPCODE(BitwiseOr32,                                         U32,            U32,            U32,                                                            )
+OPCODE(BitwiseXor32,                                        U32,            U32,            U32,                                                            )
+OPCODE(BitFieldInsert,                                      U32,            U32,            U32,            U32,            U32,                            )
+OPCODE(BitFieldSExtract,                                    U32,            U32,            U32,            U32,                                            )
+OPCODE(BitFieldUExtract,                                    U32,            U32,            U32,            U32,                                            )
+OPCODE(BitReverse32,                                        U32,            U32,                                                                            )
+OPCODE(BitCount32,                                          U32,            U32,                                                                            )
+OPCODE(BitwiseNot32,                                        U32,            U32,                                                                            )
 
-OPCODE(FindSMsb32,                                          U32,            U32,                                                            )
-OPCODE(FindUMsb32,                                          U32,            U32,                                                            )
-OPCODE(SMin32,                                              U32,            U32,            U32,                                            )
-OPCODE(UMin32,                                              U32,            U32,            U32,                                            )
-OPCODE(SMax32,                                              U32,            U32,            U32,                                            )
-OPCODE(UMax32,                                              U32,            U32,            U32,                                            )
-OPCODE(SLessThan,                                           U1,             U32,            U32,                                            )
-OPCODE(ULessThan,                                           U1,             U32,            U32,                                            )
-OPCODE(IEqual,                                              U1,             U32,            U32,                                            )
-OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                            )
-OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                            )
-OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                            )
-OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                            )
-OPCODE(INotEqual,                                           U1,             U32,            U32,                                            )
-OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                            )
-OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                            )
+OPCODE(FindSMsb32,                                          U32,            U32,                                                                            )
+OPCODE(FindUMsb32,                                          U32,            U32,                                                                            )
+OPCODE(SMin32,                                              U32,            U32,            U32,                                                            )
+OPCODE(UMin32,                                              U32,            U32,            U32,                                                            )
+OPCODE(SMax32,                                              U32,            U32,            U32,                                                            )
+OPCODE(UMax32,                                              U32,            U32,            U32,                                                            )
+OPCODE(SLessThan,                                           U1,             U32,            U32,                                                            )
+OPCODE(ULessThan,                                           U1,             U32,            U32,                                                            )
+OPCODE(IEqual,                                              U1,             U32,            U32,                                                            )
+OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                                            )
+OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                                            )
+OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                                            )
+OPCODE(INotEqual,                                           U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
+OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
 
 // Logical operations
-OPCODE(LogicalOr,                                           U1,             U1,             U1,                                             )
-OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                             )
-OPCODE(LogicalXor,                                          U1,             U1,             U1,                                             )
-OPCODE(LogicalNot,                                          U1,             U1,                                                             )
+OPCODE(LogicalOr,                                           U1,             U1,             U1,                                                             )
+OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                                             )
+OPCODE(LogicalXor,                                          U1,             U1,             U1,                                                             )
+OPCODE(LogicalNot,                                          U1,             U1,                                                                             )
 
 // Conversion operations
-OPCODE(ConvertS16F16,                                       U32,            F16,                                                            )
-OPCODE(ConvertS16F32,                                       U32,            F32,                                                            )
-OPCODE(ConvertS16F64,                                       U32,            F64,                                                            )
-OPCODE(ConvertS32F16,                                       U32,            F16,                                                            )
-OPCODE(ConvertS32F32,                                       U32,            F32,                                                            )
-OPCODE(ConvertS32F64,                                       U32,            F64,                                                            )
-OPCODE(ConvertS64F16,                                       U64,            F16,                                                            )
-OPCODE(ConvertS64F32,                                       U64,            F32,                                                            )
-OPCODE(ConvertS64F64,                                       U64,            F64,                                                            )
-OPCODE(ConvertU16F16,                                       U32,            F16,                                                            )
-OPCODE(ConvertU16F32,                                       U32,            F32,                                                            )
-OPCODE(ConvertU16F64,                                       U32,            F64,                                                            )
-OPCODE(ConvertU32F16,                                       U32,            F16,                                                            )
-OPCODE(ConvertU32F32,                                       U32,            F32,                                                            )
-OPCODE(ConvertU32F64,                                       U32,            F64,                                                            )
-OPCODE(ConvertU64F16,                                       U64,            F16,                                                            )
-OPCODE(ConvertU64F32,                                       U64,            F32,                                                            )
-OPCODE(ConvertU64F64,                                       U64,            F64,                                                            )
-OPCODE(ConvertU64U32,                                       U64,            U32,                                                            )
-OPCODE(ConvertU32U64,                                       U32,            U64,                                                            )
-OPCODE(ConvertF16F32,                                       F16,            F32,                                                            )
-OPCODE(ConvertF32F16,                                       F32,            F16,                                                            )
-OPCODE(ConvertF32F64,                                       F32,            F64,                                                            )
-OPCODE(ConvertF64F32,                                       F64,            F32,                                                            )
+OPCODE(ConvertS16F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertS16F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertS16F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertS32F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertS32F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertS32F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertS64F16,                                       U64,            F16,                                                                            )
+OPCODE(ConvertS64F32,                                       U64,            F32,                                                                            )
+OPCODE(ConvertS64F64,                                       U64,            F64,                                                                            )
+OPCODE(ConvertU16F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertU16F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertU16F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertU32F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertU32F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertU32F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertU64F16,                                       U64,            F16,                                                                            )
+OPCODE(ConvertU64F32,                                       U64,            F32,                                                                            )
+OPCODE(ConvertU64F64,                                       U64,            F64,                                                                            )
+OPCODE(ConvertU64U32,                                       U64,            U32,                                                                            )
+OPCODE(ConvertU32U64,                                       U32,            U64,                                                                            )
+OPCODE(ConvertF16F32,                                       F16,            F32,                                                                            )
+OPCODE(ConvertF32F16,                                       F32,            F16,                                                                            )
+OPCODE(ConvertF32F64,                                       F32,            F64,                                                                            )
+OPCODE(ConvertF64F32,                                       F64,            F32,                                                                            )
+OPCODE(ConvertF16S32,                                       F16,            U32,                                                                            )
+OPCODE(ConvertF16S64,                                       F16,            U64,                                                                            )
+OPCODE(ConvertF16U32,                                       F16,            U32,                                                                            )
+OPCODE(ConvertF16U64,                                       F16,            U64,                                                                            )
+OPCODE(ConvertF32S32,                                       F32,            U32,                                                                            )
+OPCODE(ConvertF32S64,                                       F32,            U64,                                                                            )
+OPCODE(ConvertF32U32,                                       F32,            U32,                                                                            )
+OPCODE(ConvertF32U64,                                       F32,            U64,                                                                            )
+OPCODE(ConvertF64S32,                                       F64,            U32,                                                                            )
+OPCODE(ConvertF64S64,                                       F64,            U64,                                                                            )
+OPCODE(ConvertF64U32,                                       F64,            U32,                                                                            )
+OPCODE(ConvertF64U64,                                       F64,            U64,                                                                            )
+
+// Image operations
+OPCODE(BindlessImageSampleImplicitLod,                      F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BindlessImageSampleExplicitLod,                      F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BindlessImageSampleDrefImplicitLod,                  F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(BindlessImageSampleDrefExplicitLod,                  F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+
+OPCODE(BoundImageSampleImplicitLod,                         F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BoundImageSampleExplicitLod,                         F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BoundImageSampleDrefImplicitLod,                     F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(BoundImageSampleDrefExplicitLod,                     F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+
+OPCODE(ImageSampleImplicitLod,                              F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(ImageSampleExplicitLod,                              F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(ImageSampleDrefImplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(ImageSampleDrefExplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
index 771094eb9a..8fea05f7be 100644
--- a/src/shader_recompiler/frontend/ir/reg.h
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -293,6 +293,17 @@ constexpr size_t NUM_REGS = 256;
     return reg + (-num);
 }
 
+[[nodiscard]] constexpr Reg operator++(Reg& reg) {
+    reg = reg + 1;
+    return reg;
+}
+
+[[nodiscard]] constexpr Reg operator++(Reg& reg, int) {
+    const Reg copy{reg};
+    reg = reg + 1;
+    return copy;
+}
+
 [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
     return static_cast<size_t>(reg);
 }
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 9b7e1480be..3602883d6f 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -75,6 +75,7 @@ private:
         f64 imm_f64;
     };
 };
+static_assert(std::is_trivially_copyable_v<Value>);
 
 template <IR::Type type_>
 class TypedValue : public Value {
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
index 5d0b91598e..f2a2ff331e 100644
--- a/src/shader_recompiler/frontend/maxwell/maxwell.inc
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -249,8 +249,8 @@ INST(SULD,         "SULD",           "1110 1011 000- ----")
 INST(SURED,        "SURED",          "1110 1011 010- ----")
 INST(SUST,         "SUST",           "1110 1011 001- ----")
 INST(SYNC,         "SYNC",           "1111 0000 1111 1---")
-INST(TEX,          "TEX",            "1100 00-- --11 1---")
-INST(TEX_b,        "TEX (b)",        "1101 1110 1011 1---")
+INST(TEX,          "TEX",            "1100 0--- ---- ----")
+INST(TEX_b,        "TEX (b)",        "1101 1110 10-- ----")
 INST(TEXS,         "TEXS",           "1101 -00- ---- ----")
 INST(TLD,          "TLD",            "1101 1100 --11 1---")
 INST(TLD_b,        "TLD (b)",        "1101 1101 --11 1---")
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index dbfc04f75e..b270bbccdb 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
         Optimization::SsaRewritePass(function.post_order_blocks);
     }
     Optimization::GlobalMemoryToStorageBufferPass(program);
+    Optimization::TexturePass(env, program);
     for (IR::Function& function : functions) {
         Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function);
         Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index fc6030e04e..ff429c1263 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -585,14 +585,6 @@ void TranslatorVisitor::SYNC(u64) {
     ThrowNotImplemented(Opcode::SYNC);
 }
 
-void TranslatorVisitor::TEX(u64) {
-    ThrowNotImplemented(Opcode::TEX);
-}
-
-void TranslatorVisitor::TEX_b(u64) {
-    ThrowNotImplemented(Opcode::TEX_b);
-}
-
 void TranslatorVisitor::TEXS(u64) {
     ThrowNotImplemented(Opcode::TEXS);
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp
new file mode 100644
index 0000000000..98d9f4c648
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp
@@ -0,0 +1,232 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Blod : u64 {
+    None,
+    LZ,
+    LB,
+    LL,
+    INVALIDBLOD4,
+    INVALIDBLOD5,
+    LBA,
+    LLA,
+};
+
+enum class TextureType : u64 {
+    _1D,
+    ARRAY_1D,
+    _2D,
+    ARRAY_2D,
+    _3D,
+    ARRAY_3D,
+    CUBE,
+    ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type, bool dc) {
+    switch (type) {
+    case TextureType::_1D:
+        return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D;
+    case TextureType::ARRAY_1D:
+        return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D;
+    case TextureType::_2D:
+        return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D;
+    case TextureType::ARRAY_2D:
+        return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D;
+    case TextureType::_3D:
+        return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D;
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube;
+    case TextureType::ARRAY_CUBE:
+        return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube;
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+    const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }};
+    switch (type) {
+    case TextureType::_1D:
+        return v.F(reg);
+    case TextureType::ARRAY_1D:
+        return v.ir.CompositeConstruct(read_array(), v.F(reg + 1));
+    case TextureType::_2D:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+    case TextureType::ARRAY_2D:
+        return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::_3D:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::ARRAY_CUBE:
+        return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
+    switch (blod) {
+    case Blod::None:
+        return v.ir.Imm32(0.0f);
+    case Blod::LZ:
+        return v.ir.Imm32(0.0f);
+    case Blod::LB:
+    case Blod::LL:
+    case Blod::LBA:
+    case Blod::LLA:
+        return v.F(reg++);
+    case Blod::INVALIDBLOD4:
+    case Blod::INVALIDBLOD5:
+        break;
+    }
+    throw NotImplementedException("Invalid blod {}", blod);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+    const IR::U32 value{v.X(reg++)};
+    switch (type) {
+    case TextureType::_1D:
+    case TextureType::ARRAY_1D:
+        return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4));
+    case TextureType::_2D:
+    case TextureType::ARRAY_2D:
+        return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)),
+                                       v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)));
+    case TextureType::_3D:
+    case TextureType::ARRAY_3D:
+        return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)),
+                                       v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)),
+                                       v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4)));
+    case TextureType::CUBE:
+    case TextureType::ARRAY_CUBE:
+        throw NotImplementedException("Illegal offset on CUBE sample");
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+bool HasExplicitLod(Blod blod) {
+    switch (blod) {
+    case Blod::LL:
+    case Blod::LLA:
+    case Blod::LZ:
+        return true;
+    default:
+        return false;
+    }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
+          std::optional<u32> cbuf_offset) {
+    union {
+        u64 raw;
+        BitField<35, 1, u64> ndv;
+        BitField<49, 1, u64> nodep;
+        BitField<50, 1, u64> dc;
+        BitField<51, 3, IR::Pred> sparse_pred;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<20, 8, IR::Reg> meta_reg;
+        BitField<28, 3, TextureType> type;
+        BitField<31, 4, u64> mask;
+    } const tex{insn};
+
+    if (lc) {
+        throw NotImplementedException("LC");
+    }
+    const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
+
+    IR::Reg meta_reg{tex.meta_reg};
+    IR::Value handle;
+    IR::Value offset;
+    IR::F32 dref;
+    IR::F32 lod_clamp;
+    if (cbuf_offset) {
+        handle = v.ir.Imm32(*cbuf_offset);
+    } else {
+        handle = v.X(meta_reg++);
+    }
+    const IR::F32 lod{MakeLod(v, meta_reg, blod)};
+    if (aoffi) {
+        offset = MakeOffset(v, meta_reg, tex.type);
+    }
+    if (tex.dc != 0) {
+        dref = v.F(meta_reg++);
+    }
+    IR::TextureInstInfo info{};
+    info.type.Assign(GetType(tex.type, tex.dc != 0));
+    info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
+    info.has_lod_clamp.Assign(lc ? 1 : 0);
+
+    const IR::Value sample{[&]() -> IR::Value {
+        if (tex.dc == 0) {
+            if (HasExplicitLod(blod)) {
+                return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info);
+            } else {
+                return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
+            }
+        }
+        if (HasExplicitLod(blod)) {
+            return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp,
+                                                   info);
+        } else {
+            return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
+                                                   info);
+        }
+    }()};
+
+    for (int element = 0; element < 4; ++element) {
+        if (((tex.mask >> element) & 1) == 0) {
+            continue;
+        }
+        IR::F32 value;
+        if (tex.dc != 0) {
+            value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
+        } else {
+            value = IR::F32{v.ir.CompositeExtract(sample, element)};
+        }
+        v.F(tex.dest_reg + element, value);
+    }
+    if (tex.sparse_pred != IR::Pred::PT) {
+        v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEX(u64 insn) {
+    union {
+        u64 raw;
+        BitField<54, 1, u64> aoffi;
+        BitField<55, 3, Blod> blod;
+        BitField<58, 1, u64> lc;
+        BitField<36, 13, u64> cbuf_offset;
+    } const tex{insn};
+
+    Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset));
+}
+
+void TranslatorVisitor::TEX_b(u64 insn) {
+    union {
+        u64 raw;
+        BitField<36, 1, u64> aoffi;
+        BitField<37, 3, Blod> blod;
+        BitField<40, 1, u64> lc;
+    } const tex{insn};
+
+    Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 6662ef4cdc..960beadd43 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -82,6 +82,25 @@ void VisitUsages(Info& info, IR::Inst& inst) {
             throw NotImplementedException("Constant buffer with non-immediate index");
         }
         break;
+    case IR::Opcode::BindlessImageSampleImplicitLod:
+    case IR::Opcode::BindlessImageSampleExplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+    case IR::Opcode::BoundImageSampleImplicitLod:
+    case IR::Opcode::BoundImageSampleExplicitLod:
+    case IR::Opcode::BoundImageSampleDrefImplicitLod:
+    case IR::Opcode::BoundImageSampleDrefExplicitLod:
+    case IR::Opcode::ImageSampleImplicitLod:
+    case IR::Opcode::ImageSampleExplicitLod:
+    case IR::Opcode::ImageSampleDrefImplicitLod:
+    case IR::Opcode::ImageSampleDrefExplicitLod: {
+        const TextureType type{inst.Flags<IR::TextureInstInfo>().type};
+        info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D ||
+                                type == TextureType::Shadow1D || type == TextureType::ShadowArray1D;
+        info.uses_sparse_residency |=
+            inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
+        break;
+    }
     default:
         break;
     }
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 965e521352..2625c0bb2b 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -226,6 +226,7 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value,
     }
     // Reversed loops are more likely to find the right result
     for (size_t arg = inst->NumArgs(); arg--;) {
+        IR::Block* inst_block{block};
         if (inst->Opcode() == IR::Opcode::Phi) {
             // If we are going through a phi node, mark the current block as visited
             visited.insert(block);
@@ -235,15 +236,11 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value,
                 // Already visited, skip
                 continue;
             }
-            const std::optional storage_buffer{Track(phi_block, inst->Arg(arg), bias, visited)};
-            if (storage_buffer) {
-                return *storage_buffer;
-            }
-        } else {
-            const std::optional storage_buffer{Track(block, inst->Arg(arg), bias, visited)};
-            if (storage_buffer) {
-                return *storage_buffer;
-            }
+            inst_block = phi_block;
+        }
+        const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)};
+        if (storage_buffer) {
+            return *storage_buffer;
         }
     }
     return std::nullopt;
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 38106308cb..3b7e7306bb 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -6,6 +6,7 @@
 
 #include <span>
 
+#include "shader_recompiler/environment.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
 #include "shader_recompiler/frontend/ir/function.h"
 #include "shader_recompiler/frontend/ir/program.h"
@@ -26,6 +27,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program);
 void IdentityRemovalPass(IR::Function& function);
 void LowerFp16ToFp32(IR::Program& program);
 void SsaRewritePass(std::span<IR::Block* const> post_order_blocks);
+void TexturePass(Environment& env, IR::Program& program);
 void VerificationPass(const IR::Function& function);
 
 } // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
new file mode 100644
index 0000000000..80e4ad6a97
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -0,0 +1,199 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include <boost/container/flat_set.hpp>
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/ir_opt/passes.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Optimization {
+namespace {
+struct ConstBufferAddr {
+    u32 index;
+    u32 offset;
+};
+
+struct TextureInst {
+    ConstBufferAddr cbuf;
+    IR::Inst* inst;
+    IR::Block* block;
+};
+
+using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
+
+using VisitedBlocks = boost::container::flat_set<IR::Block*, std::less<IR::Block*>,
+                                                 boost::container::small_vector<IR::Block*, 2>>;
+
+IR::Opcode IndexedInstruction(const IR::Inst& inst) {
+    switch (inst.Opcode()) {
+    case IR::Opcode::BindlessImageSampleImplicitLod:
+    case IR::Opcode::BoundImageSampleImplicitLod:
+        return IR::Opcode::ImageSampleImplicitLod;
+    case IR::Opcode::BoundImageSampleExplicitLod:
+    case IR::Opcode::BindlessImageSampleExplicitLod:
+        return IR::Opcode::ImageSampleExplicitLod;
+    case IR::Opcode::BoundImageSampleDrefImplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+        return IR::Opcode::ImageSampleDrefImplicitLod;
+    case IR::Opcode::BoundImageSampleDrefExplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+        return IR::Opcode::ImageSampleDrefExplicitLod;
+    default:
+        return IR::Opcode::Void;
+    }
+}
+
+bool IsBindless(const IR::Inst& inst) {
+    switch (inst.Opcode()) {
+    case IR::Opcode::BindlessImageSampleImplicitLod:
+    case IR::Opcode::BindlessImageSampleExplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+    case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+        return true;
+    case IR::Opcode::BoundImageSampleImplicitLod:
+    case IR::Opcode::BoundImageSampleExplicitLod:
+    case IR::Opcode::BoundImageSampleDrefImplicitLod:
+    case IR::Opcode::BoundImageSampleDrefExplicitLod:
+        return false;
+    default:
+        throw InvalidArgument("Invalid opcode {}", inst.Opcode());
+    }
+}
+
+bool IsTextureInstruction(const IR::Inst& inst) {
+    return IndexedInstruction(inst) != IR::Opcode::Void;
+}
+
+std::optional<ConstBufferAddr> Track(IR::Block* block, const IR::Value& value,
+                                     VisitedBlocks& visited) {
+    if (value.IsImmediate()) {
+        // Immediates can't be a storage buffer
+        return std::nullopt;
+    }
+    const IR::Inst* const inst{value.InstRecursive()};
+    if (inst->Opcode() == IR::Opcode::GetCbuf) {
+        const IR::Value index{inst->Arg(0)};
+        const IR::Value offset{inst->Arg(1)};
+        if (!index.IsImmediate()) {
+            // Reading a bindless texture from variable indices is valid
+            // but not supported here at the moment
+            return std::nullopt;
+        }
+        if (!offset.IsImmediate()) {
+            // TODO: Support arrays of textures
+            return std::nullopt;
+        }
+        return ConstBufferAddr{
+            .index{index.U32()},
+            .offset{offset.U32()},
+        };
+    }
+    // Reversed loops are more likely to find the right result
+    for (size_t arg = inst->NumArgs(); arg--;) {
+        IR::Block* inst_block{block};
+        if (inst->Opcode() == IR::Opcode::Phi) {
+            // If we are going through a phi node, mark the current block as visited
+            visited.insert(block);
+            // and skip already visited blocks to avoid looping forever
+            IR::Block* const phi_block{inst->PhiBlock(arg)};
+            if (visited.contains(phi_block)) {
+                // Already visited, skip
+                continue;
+            }
+            inst_block = phi_block;
+        }
+        const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), visited)};
+        if (storage_buffer) {
+            return *storage_buffer;
+        }
+    }
+    return std::nullopt;
+}
+
+TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
+    ConstBufferAddr addr;
+    if (IsBindless(inst)) {
+        VisitedBlocks visited;
+        const std::optional<ConstBufferAddr> track_addr{Track(block, IR::Value{&inst}, visited)};
+        if (!track_addr) {
+            throw NotImplementedException("Failed to track bindless texture constant buffer");
+        }
+        addr = *track_addr;
+    } else {
+        addr = ConstBufferAddr{
+            .index{env.TextureBoundBuffer()},
+            .offset{inst.Arg(0).U32()},
+        };
+    }
+    return TextureInst{
+        .cbuf{addr},
+        .inst{&inst},
+        .block{block},
+    };
+}
+
+class Descriptors {
+public:
+    explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {}
+
+    u32 Add(const TextureDescriptor& descriptor) {
+        // TODO: Handle arrays
+        auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) {
+            return descriptor.cbuf_index == existing.cbuf_index &&
+                   descriptor.cbuf_offset == existing.cbuf_offset &&
+                   descriptor.type == existing.type;
+        })};
+        if (it != descriptors.end()) {
+            return static_cast<u32>(std::distance(descriptors.begin(), it));
+        }
+        descriptors.push_back(descriptor);
+        return static_cast<u32>(descriptors.size()) - 1;
+    }
+
+private:
+    TextureDescriptors& descriptors;
+};
+} // Anonymous namespace
+
+void TexturePass(Environment& env, IR::Program& program) {
+    TextureInstVector to_replace;
+    for (IR::Function& function : program.functions) {
+        for (IR::Block* const block : function.post_order_blocks) {
+            for (IR::Inst& inst : block->Instructions()) {
+                if (!IsTextureInstruction(inst)) {
+                    continue;
+                }
+                to_replace.push_back(MakeInst(env, block, inst));
+            }
+        }
+    }
+    // Sort instructions to visit textures by constant buffer index, then by offset
+    std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) {
+        return lhs.cbuf.offset < rhs.cbuf.offset;
+    });
+    std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) {
+        return lhs.cbuf.index < rhs.cbuf.index;
+    });
+    Descriptors descriptors{program.info.texture_descriptors};
+    for (TextureInst& texture_inst : to_replace) {
+        // TODO: Handle arrays
+        IR::Inst* const inst{texture_inst.inst};
+        const u32 index{descriptors.Add(TextureDescriptor{
+            .type{inst->Flags<IR::TextureInstInfo>().type},
+            .cbuf_index{texture_inst.cbuf.index},
+            .cbuf_offset{texture_inst.cbuf.offset},
+            .count{1},
+        })};
+        inst->ReplaceOpcode(IndexedInstruction(*inst));
+        inst->SetArg(0, IR::Value{index});
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 8766bf13e9..103a2f0b43 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -8,25 +8,51 @@
 
 #include "common/common_types.h"
 
+#include <boost/container/small_vector.hpp>
 #include <boost/container/static_vector.hpp>
 
 namespace Shader {
 
+enum class TextureType : u32 {
+    Color1D,
+    ColorArray1D,
+    Color2D,
+    ColorArray2D,
+    Color3D,
+    ColorCube,
+    ColorArrayCube,
+    Shadow1D,
+    ShadowArray1D,
+    Shadow2D,
+    ShadowArray2D,
+    Shadow3D,
+    ShadowCube,
+    ShadowArrayCube,
+};
+
+struct TextureDescriptor {
+    TextureType type;
+    u32 cbuf_index;
+    u32 cbuf_offset;
+    u32 count;
+};
+using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
+
+struct ConstantBufferDescriptor {
+    u32 index;
+    u32 count;
+};
+
+struct StorageBufferDescriptor {
+    u32 cbuf_index;
+    u32 cbuf_offset;
+    u32 count;
+};
+
 struct Info {
     static constexpr size_t MAX_CBUFS{18};
     static constexpr size_t MAX_SSBOS{16};
 
-    struct ConstantBufferDescriptor {
-        u32 index;
-        u32 count;
-    };
-
-    struct StorageBufferDescriptor {
-        u32 cbuf_index;
-        u32 cbuf_offset;
-        u32 count;
-    };
-
     bool uses_workgroup_id{};
     bool uses_local_invocation_id{};
     bool uses_fp16{};
@@ -35,12 +61,16 @@ struct Info {
     bool uses_fp16_denorms_preserve{};
     bool uses_fp32_denorms_flush{};
     bool uses_fp32_denorms_preserve{};
+    bool uses_image_1d{};
+    bool uses_sampled_1d{};
+    bool uses_sparse_residency{};
 
     u32 constant_buffer_mask{};
 
     boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
         constant_buffer_descriptors;
     boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;
+    TextureDescriptors texture_descriptors;
 };
 
 } // namespace Shader
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index a658a3276b..ef8bef6ffc 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -40,6 +40,16 @@ vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Sh
         });
         ++binding;
     }
+    for (const auto& desc : info.texture_descriptors) {
+        bindings.push_back({
+            .binding = binding,
+            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .descriptorCount = 1,
+            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+            .pImmutableSamplers = nullptr,
+        });
+        ++binding;
+    }
     return device.GetLogical().CreateDescriptorSetLayout({
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
         .pNext = nullptr,
@@ -79,6 +89,18 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
         ++binding;
         offset += sizeof(DescriptorUpdateEntry);
     }
+    for (const auto& desc : info.texture_descriptors) {
+        entries.push_back({
+            .dstBinding = binding,
+            .dstArrayElement = 0,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .offset = offset,
+            .stride = sizeof(DescriptorUpdateEntry),
+        });
+        ++binding;
+        offset += sizeof(DescriptorUpdateEntry);
+    }
     return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
         .pNext = nullptr,
@@ -92,6 +114,44 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
         .set = 0,
     });
 }
+
+struct TextureHandle {
+    explicit TextureHandle(u32 data, bool via_header_index) {
+        const Tegra::Texture::TextureHandle handle{data};
+        image = handle.tic_id;
+        sampler = via_header_index ? image : handle.tsc_id.Value();
+    }
+
+    u32 image;
+    u32 sampler;
+};
+
+VideoCommon::ImageViewType CastType(Shader::TextureType type) {
+    switch (type) {
+    case Shader::TextureType::Color1D:
+    case Shader::TextureType::Shadow1D:
+        return VideoCommon::ImageViewType::e1D;
+    case Shader::TextureType::ColorArray1D:
+    case Shader::TextureType::ShadowArray1D:
+        return VideoCommon::ImageViewType::e1DArray;
+    case Shader::TextureType::Color2D:
+    case Shader::TextureType::Shadow2D:
+        return VideoCommon::ImageViewType::e2D;
+    case Shader::TextureType::ColorArray2D:
+    case Shader::TextureType::ShadowArray2D:
+        return VideoCommon::ImageViewType::e2DArray;
+    case Shader::TextureType::Color3D:
+    case Shader::TextureType::Shadow3D:
+        return VideoCommon::ImageViewType::e3D;
+    case Shader::TextureType::ColorCube:
+    case Shader::TextureType::ShadowCube:
+        return VideoCommon::ImageViewType::Cube;
+    case Shader::TextureType::ColorArrayCube:
+    case Shader::TextureType::ShadowArrayCube:
+        return VideoCommon::ImageViewType::CubeArray;
+    }
+    UNREACHABLE_MSG("Invalid texture type {}", type);
+}
 } // Anonymous namespace
 
 ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
@@ -143,6 +203,47 @@ void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) {
     buffer_cache.BindHostComputeBuffers();
 }
 
+void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute,
+                                            Tegra::MemoryManager& gpu_memory,
+                                            TextureCache& texture_cache) {
+    texture_cache.SynchronizeComputeDescriptors();
+
+    static constexpr size_t max_elements = 64;
+    std::array<ImageId, max_elements> image_view_ids;
+    boost::container::static_vector<u32, max_elements> image_view_indices;
+    boost::container::static_vector<VkSampler, max_elements> sampler_handles;
+
+    const auto& launch_desc{kepler_compute.launch_description};
+    const auto& cbufs{launch_desc.const_buffer_config};
+    const bool via_header_index{launch_desc.linked_tsc};
+    for (const auto& desc : info.texture_descriptors) {
+        const u32 cbuf_index{desc.cbuf_index};
+        const u32 cbuf_offset{desc.cbuf_offset};
+        ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
+
+        const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset};
+        const u32 raw_handle{gpu_memory.Read<u32>(addr)};
+
+        const TextureHandle handle(raw_handle, via_header_index);
+        image_view_indices.push_back(handle.image);
+
+        Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
+        sampler_handles.push_back(sampler->Handle());
+    }
+
+    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+    texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+    size_t index{};
+    for (const auto& desc : info.texture_descriptors) {
+        const VkSampler vk_sampler{sampler_handles[index]};
+        ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])};
+        const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))};
+        update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler);
+        ++index;
+    }
+}
+
 VkDescriptorSet ComputePipeline::UpdateDescriptorSet() {
     const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
     update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index dc045d5245..08d73a2a4b 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -6,9 +6,11 @@
 
 #include "common/common_types.h"
 #include "shader_recompiler/shader_info.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_pipeline.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
@@ -30,6 +32,8 @@ public:
     ComputePipeline(const ComputePipeline&) = delete;
 
     void ConfigureBufferCache(BufferCache& buffer_cache);
+    void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute,
+                               Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache);
 
     [[nodiscard]] VkDescriptorSet UpdateDescriptorSet();
 
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 2497c2385f..bcb7dd2eb4 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -76,6 +76,10 @@ public:
         return gpu_memory.Read<u64>(program_base + address);
     }
 
+    u32 TextureBoundBuffer() override {
+        return kepler_compute.regs.tex_cb_index;
+    }
+
     std::array<u32, 3> WorkgroupSize() override {
         const auto& qmd{kepler_compute.launch_description};
         return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 1b662f9f3a..c94419d29c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -241,9 +241,10 @@ void RasterizerVulkan::DispatchCompute() {
     if (!pipeline) {
         return;
     }
-    std::scoped_lock lock{buffer_cache.mutex};
+    std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
     update_descriptor_queue.Acquire();
     pipeline->ConfigureBufferCache(buffer_cache);
+    pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache);
     const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()};
 
     const auto& qmd{kepler_compute.launch_description};
-- 
cgit v1.2.3-70-g09d2


From 8dd0acfaeba9396fb5c1e142a431a2a29f345855 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 17 Mar 2021 01:30:23 -0300
Subject: shader: Fix instruction transitions in and out of Phi

---
 .../frontend/ir/microinstruction.cpp                 | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 88e186f215..5946105d2f 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -182,7 +182,7 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
 
 void Inst::Invalidate() {
     ClearArgs();
-    op = Opcode::Void;
+    ReplaceOpcode(Opcode::Void);
 }
 
 void Inst::ClearArgs() {
@@ -206,20 +206,22 @@ void Inst::ClearArgs() {
 
 void Inst::ReplaceUsesWith(Value replacement) {
     Invalidate();
-
-    op = Opcode::Identity;
-
+    ReplaceOpcode(Opcode::Identity);
     if (!replacement.IsImmediate()) {
         Use(replacement);
     }
-    if (op == Opcode::Phi) {
-        phi_args[0].second = replacement;
-    } else {
-        args[0] = replacement;
-    }
+    args[0] = replacement;
 }
 
 void Inst::ReplaceOpcode(IR::Opcode opcode) {
+    if (opcode == IR::Opcode::Phi) {
+        throw LogicError("Cannot transition into Phi");
+    }
+    if (op == Opcode::Phi) {
+        // Transition out of phi arguments into non-phi
+        std::destroy_at(&phi_args);
+        std::construct_at(&args);
+    }
     op = opcode;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 260743f371236f7c57b01334b1c3474b15a47c39 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 19 Mar 2021 19:28:31 -0300
Subject: shader: Add partial rasterizer integration

---
 src/shader_recompiler/CMakeLists.txt               |   4 +-
 .../backend/spirv/emit_context.cpp                 |  64 ++-
 src/shader_recompiler/backend/spirv/emit_context.h |  18 +-
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |  44 +-
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  18 +-
 .../backend/spirv/emit_spirv_context_get_set.cpp   |  55 ++-
 .../backend/spirv/emit_spirv_control_flow.cpp      |  23 +-
 src/shader_recompiler/environment.h                |  14 +
 src/shader_recompiler/frontend/ir/attribute.cpp    |   2 +-
 src/shader_recompiler/frontend/ir/attribute.h      |   2 +-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  14 +
 src/shader_recompiler/frontend/ir/ir_emitter.h     |   4 +
 .../frontend/ir/microinstruction.cpp               |   3 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |  11 +-
 src/shader_recompiler/frontend/ir/program.h        |   2 +
 src/shader_recompiler/frontend/ir/reg.h            |   4 +-
 .../frontend/maxwell/control_flow.cpp              |  31 +-
 .../frontend/maxwell/control_flow.h                |   3 +-
 src/shader_recompiler/frontend/maxwell/program.cpp |   1 +
 .../frontend/maxwell/structured_control_flow.cpp   |  18 +
 .../frontend/maxwell/translate/impl/exit.cpp       |  15 -
 .../maxwell/translate/impl/exit_program.cpp        |  43 ++
 .../frontend/maxwell/translate/impl/impl.h         |   4 +-
 .../translate/impl/load_store_attribute.cpp        |  86 +++-
 .../maxwell/translate/impl/not_implemented.cpp     |  16 +-
 .../maxwell/translate/impl/texture_fetch.cpp       |   2 +-
 .../translate/impl/texture_fetch_swizzled.cpp      |   2 +-
 .../ir_opt/collect_shader_info_pass.cpp            |  60 ++-
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  |   2 +-
 src/shader_recompiler/program_header.h             | 143 +++++++
 src/shader_recompiler/recompiler.cpp               |  28 --
 src/shader_recompiler/recompiler.h                 |  20 -
 src/shader_recompiler/shader_info.h                |  10 +
 src/shader_recompiler/stage.h                      |  19 +
 src/video_core/CMakeLists.txt                      |   6 +-
 .../renderer_vulkan/fixed_pipeline_state.cpp       |   4 +
 .../renderer_vulkan/fixed_pipeline_state.h         |   9 +-
 src/video_core/renderer_vulkan/maxwell_to_vk.cpp   |  24 ++
 src/video_core/renderer_vulkan/maxwell_to_vk.h     |   2 +
 src/video_core/renderer_vulkan/pipeline_helper.h   | 162 ++++++++
 .../renderer_vulkan/vk_compute_pipeline.cpp        | 209 ++--------
 .../renderer_vulkan/vk_compute_pipeline.h          |   3 +-
 .../renderer_vulkan/vk_graphics_pipeline.cpp       | 445 +++++++++++++++++++++
 .../renderer_vulkan/vk_graphics_pipeline.h         |  66 +++
 src/video_core/renderer_vulkan/vk_pipeline.h       |  36 --
 .../renderer_vulkan/vk_pipeline_cache.cpp          | 346 ++++++++++++----
 src/video_core/renderer_vulkan/vk_pipeline_cache.h |  82 +++-
 src/video_core/renderer_vulkan/vk_rasterizer.cpp   |  47 ++-
 src/video_core/renderer_vulkan/vk_rasterizer.h     |   2 +
 .../renderer_vulkan/vk_render_pass_cache.cpp       | 100 +++++
 .../renderer_vulkan/vk_render_pass_cache.h         |  53 +++
 .../renderer_vulkan/vk_texture_cache.cpp           |  68 +---
 src/video_core/renderer_vulkan/vk_texture_cache.h  |  29 +-
 src/video_core/vulkan_common/vulkan_device.cpp     |  15 +
 54 files changed, 1927 insertions(+), 566 deletions(-)
 delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
 create mode 100644 src/shader_recompiler/program_header.h
 delete mode 100644 src/shader_recompiler/recompiler.cpp
 delete mode 100644 src/shader_recompiler/recompiler.h
 create mode 100644 src/shader_recompiler/stage.h
 create mode 100644 src/video_core/renderer_vulkan/pipeline_helper.h
 create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
 create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h
 delete mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h
 create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
 create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.h

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index b870e99378..31c3941064 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/common_funcs.h
     frontend/maxwell/translate/impl/condition_code_set.cpp
     frontend/maxwell/translate/impl/double_add.cpp
+    frontend/maxwell/translate/impl/exit_program.cpp
     frontend/maxwell/translate/impl/find_leading_one.cpp
     frontend/maxwell/translate/impl/floating_point_add.cpp
     frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -121,9 +122,8 @@ add_library(shader_recompiler STATIC
     ir_opt/texture_pass.cpp
     ir_opt/verification_pass.cpp
     object_pool.h
+    program_header.h
     profile.h
-    recompiler.cpp
-    recompiler.h
     shader_info.h
 )
 
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 204389d749..6c79b611bf 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -62,18 +62,15 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
     }
 }
 
-EmitContext::EmitContext(const Profile& profile_, IR::Program& program)
+EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding)
     : Sirit::Module(0x00010000), profile{profile_} {
     AddCapability(spv::Capability::Shader);
     DefineCommonTypes(program.info);
     DefineCommonConstants();
-    DefineSpecialVariables(program.info);
-
-    u32 binding{};
+    DefineInterfaces(program.info, program.stage);
     DefineConstantBuffers(program.info, binding);
     DefineStorageBuffers(program.info, binding);
     DefineTextures(program.info, binding);
-
     DefineLabels(program);
 }
 
@@ -96,6 +93,8 @@ Id EmitContext::Def(const IR::Value& value) {
         return Constant(F32[1], value.F32());
     case IR::Type::F64:
         return Constant(F64[1], value.F64());
+    case IR::Type::Label:
+        return value.Label()->Definition<Id>();
     default:
         throw NotImplementedException("Immediate type {}", value.Type());
     }
@@ -109,6 +108,9 @@ void EmitContext::DefineCommonTypes(const Info& info) {
     F32.Define(*this, TypeFloat(32), "f32");
     U32.Define(*this, TypeInt(32, false), "u32");
 
+    input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
+    output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
+
     if (info.uses_int8) {
         AddCapability(spv::Capability::Int8);
         U8 = Name(TypeInt(8, false), "u8");
@@ -139,15 +141,20 @@ void EmitContext::DefineCommonConstants() {
     u32_zero_value = Constant(U32[1], 0U);
 }
 
-void EmitContext::DefineSpecialVariables(const Info& info) {
-    const auto define{[this](Id type, spv::BuiltIn builtin, spv::StorageClass storage_class) {
-        const Id pointer_type{TypePointer(storage_class, type)};
-        const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::Input)};
-        Decorate(id, spv::Decoration::BuiltIn, builtin);
-        return id;
-    }};
+void EmitContext::DefineInterfaces(const Info& info, Stage stage) {
+    const auto define{
+        [this](Id type, std::optional<spv::BuiltIn> builtin, spv::StorageClass storage_class) {
+            const Id pointer_type{TypePointer(storage_class, type)};
+            const Id id{AddGlobalVariable(pointer_type, storage_class)};
+            if (builtin) {
+                Decorate(id, spv::Decoration::BuiltIn, *builtin);
+            }
+            interfaces.push_back(id);
+            return id;
+        }};
     using namespace std::placeholders;
     const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)};
+    const auto define_output{std::bind(define, _1, _2, spv::StorageClass::Output)};
 
     if (info.uses_workgroup_id) {
         workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId);
@@ -155,6 +162,39 @@ void EmitContext::DefineSpecialVariables(const Info& info) {
     if (info.uses_local_invocation_id) {
         local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId);
     }
+    if (info.loads_position) {
+        const bool is_fragment{stage != Stage::Fragment};
+        const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
+        input_position = define_input(F32[4], built_in);
+    }
+    for (size_t i = 0; i < info.loads_generics.size(); ++i) {
+        if (info.loads_generics[i]) {
+            // FIXME: Declare size from input
+            input_generics[i] = define_input(F32[4], std::nullopt);
+            Decorate(input_generics[i], spv::Decoration::Location, static_cast<u32>(i));
+            Name(input_generics[i], fmt::format("in_attr{}", i));
+        }
+    }
+    if (info.stores_position) {
+        output_position = define_output(F32[4], spv::BuiltIn::Position);
+    }
+    for (size_t i = 0; i < info.stores_generics.size(); ++i) {
+        if (info.stores_generics[i]) {
+            output_generics[i] = define_output(F32[4], std::nullopt);
+            Decorate(output_generics[i], spv::Decoration::Location, static_cast<u32>(i));
+            Name(output_generics[i], fmt::format("out_attr{}", i));
+        }
+    }
+    if (stage == Stage::Fragment) {
+        for (size_t i = 0; i < 8; ++i) {
+            if (!info.stores_frag_color[i]) {
+                continue;
+            }
+            frag_color[i] = define_output(F32[4], std::nullopt);
+            Decorate(frag_color[i], spv::Decoration::Location, static_cast<u32>(i));
+            Name(frag_color[i], fmt::format("frag_color{}", i));
+        }
+    }
 }
 
 void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 35eca258a8..2d7961ac3b 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -46,7 +46,7 @@ struct UniformDefinitions {
 
 class EmitContext final : public Sirit::Module {
 public:
-    explicit EmitContext(const Profile& profile, IR::Program& program);
+    explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding);
     ~EmitContext();
 
     [[nodiscard]] Id Def(const IR::Value& value);
@@ -71,6 +71,9 @@ public:
 
     UniformDefinitions uniform_types;
 
+    Id input_f32{};
+    Id output_f32{};
+
     Id storage_u32{};
 
     std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
@@ -80,10 +83,21 @@ public:
     Id workgroup_id{};
     Id local_invocation_id{};
 
+    Id input_position{};
+    std::array<Id, 32> input_generics{};
+
+    Id output_position{};
+    std::array<Id, 32> output_generics{};
+
+    std::array<Id, 8> frag_color{};
+    Id frag_depth {};
+
+    std::vector<Id> interfaces;
+
 private:
     void DefineCommonTypes(const Info& info);
     void DefineCommonConstants();
-    void DefineSpecialVariables(const Info& info);
+    void DefineInterfaces(const Info& info, Stage stage);
     void DefineConstantBuffers(const Info& info, u32& binding);
     void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding,
                                Id type, char type_char, u32 element_size);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 50c0f72437..b8978b94a4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -54,6 +54,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
         return arg.U32();
     } else if constexpr (std::is_same_v<ArgType, IR::Block*>) {
         return arg.Label();
+    } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
+        return arg.Attribute();
     }
 }
 
@@ -197,8 +199,9 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) {
 }
 } // Anonymous namespace
 
-std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) {
-    EmitContext ctx{profile, program};
+std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program,
+                           u32& binding) {
+    EmitContext ctx{profile, program, binding};
     const Id void_function{ctx.TypeFunction(ctx.void_id)};
     const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)};
     for (IR::Block* const block : program.blocks) {
@@ -208,28 +211,41 @@ std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program
         }
     }
     ctx.OpFunctionEnd();
-    boost::container::small_vector<Id, 32> interfaces;
-    const Info& info{program.info};
-    if (info.uses_workgroup_id) {
-        interfaces.push_back(ctx.workgroup_id);
+
+    const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
+    spv::ExecutionModel execution_model{};
+    switch (env.ShaderStage()) {
+    case Shader::Stage::Compute: {
+        const std::array<u32, 3> workgroup_size{env.WorkgroupSize()};
+        execution_model = spv::ExecutionModel::GLCompute;
+        ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0],
+                             workgroup_size[1], workgroup_size[2]);
+        break;
     }
-    if (info.uses_local_invocation_id) {
-        interfaces.push_back(ctx.local_invocation_id);
+    case Shader::Stage::VertexB:
+        execution_model = spv::ExecutionModel::Vertex;
+        break;
+    case Shader::Stage::Fragment:
+        execution_model = spv::ExecutionModel::Fragment;
+        ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft);
+        break;
+    default:
+        throw NotImplementedException("Stage {}", env.ShaderStage());
     }
-    const std::span interfaces_span(interfaces.data(), interfaces.size());
-    ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span);
-
-    const std::array<u32, 3> workgroup_size{env.WorkgroupSize()};
-    ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1],
-                         workgroup_size[2]);
+    ctx.AddEntryPoint(execution_model, func, "main", interfaces);
 
     SetupDenormControl(profile, program, ctx, func);
+    const Info& info{program.info};
     if (info.uses_sampled_1d) {
         ctx.AddCapability(spv::Capability::Sampled1D);
     }
     if (info.uses_sparse_residency) {
         ctx.AddCapability(spv::Capability::SparseResidency);
     }
+    if (info.uses_demote_to_helper_invocation) {
+        ctx.AddExtension("SPV_EXT_demote_to_helper_invocation");
+        ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
+    }
     // TODO: Track this usage
     ctx.AddCapability(spv::Capability::ImageGatherExtended);
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 89566c83d0..ae121f5344 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -16,18 +16,18 @@
 namespace Shader::Backend::SPIRV {
 
 [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env,
-                                         IR::Program& program);
+                                         IR::Program& program, u32& binding);
 
 // Microinstruction emitters
 Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
 void EmitVoid(EmitContext& ctx);
 Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
-void EmitBranch(EmitContext& ctx, IR::Block* label);
-void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label,
-                           IR::Block* false_label);
-void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label);
-void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label);
+void EmitBranch(EmitContext& ctx, Id label);
+void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label);
+void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label);
+void EmitSelectionMerge(EmitContext& ctx, Id merge_label);
 void EmitReturn(EmitContext& ctx);
+void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label);
 void EmitGetRegister(EmitContext& ctx);
 void EmitSetRegister(EmitContext& ctx);
 void EmitGetPred(EmitContext& ctx);
@@ -41,10 +41,12 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
 Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
-void EmitGetAttribute(EmitContext& ctx);
-void EmitSetAttribute(EmitContext& ctx);
+Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr);
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value);
 void EmitGetAttributeIndexed(EmitContext& ctx);
 void EmitSetAttributeIndexed(EmitContext& ctx);
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
+void EmitSetFragDepth(EmitContext& ctx, Id value);
 void EmitGetZFlag(EmitContext& ctx);
 void EmitGetSFlag(EmitContext& ctx);
 void EmitGetCFlag(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 125b58cf74..02d1157403 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -5,6 +5,43 @@
 #include "shader_recompiler/backend/spirv/emit_spirv.h"
 
 namespace Shader::Backend::SPIRV {
+namespace {
+Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
+    const u32 element{static_cast<u32>(attr) % 4};
+    const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};
+    if (IR::IsGeneric(attr)) {
+        const u32 index{IR::GenericAttributeIndex(attr)};
+        return ctx.OpAccessChain(ctx.input_f32, ctx.input_generics.at(index), element_id());
+    }
+    switch (attr) {
+    case IR::Attribute::PositionX:
+    case IR::Attribute::PositionY:
+    case IR::Attribute::PositionZ:
+    case IR::Attribute::PositionW:
+        return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id());
+    default:
+        throw NotImplementedException("Read attribute {}", attr);
+    }
+}
+
+Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
+    const u32 element{static_cast<u32>(attr) % 4};
+    const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};
+    if (IR::IsGeneric(attr)) {
+        const u32 index{IR::GenericAttributeIndex(attr)};
+        return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id());
+    }
+    switch (attr) {
+    case IR::Attribute::PositionX:
+    case IR::Attribute::PositionY:
+    case IR::Attribute::PositionZ:
+    case IR::Attribute::PositionW:
+        return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id());
+    default:
+        throw NotImplementedException("Read attribute {}", attr);
+    }
+}
+} // Anonymous namespace
 
 void EmitGetRegister(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
@@ -87,12 +124,12 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
     return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset);
 }
 
-void EmitGetAttribute(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {
+    return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr));
 }
 
-void EmitSetAttribute(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) {
+    ctx.OpStore(OutputAttrPointer(ctx, attr), value);
 }
 
 void EmitGetAttributeIndexed(EmitContext&) {
@@ -103,6 +140,16 @@ void EmitSetAttributeIndexed(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
+    const Id component_id{ctx.Constant(ctx.U32[1], component)};
+    const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
+    ctx.OpStore(pointer, value);
+}
+
+void EmitSetFragDepth(EmitContext& ctx, Id value) {
+    ctx.OpStore(ctx.frag_depth, value);
+}
+
 void EmitGetZFlag(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
index 48755b8278..6b81f01694 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -6,26 +6,29 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitBranch(EmitContext& ctx, IR::Block* label) {
-    ctx.OpBranch(label->Definition<Id>());
+void EmitBranch(EmitContext& ctx, Id label) {
+    ctx.OpBranch(label);
 }
 
-void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label,
-                           IR::Block* false_label) {
-    ctx.OpBranchConditional(condition, true_label->Definition<Id>(), false_label->Definition<Id>());
+void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label) {
+    ctx.OpBranchConditional(condition, true_label, false_label);
 }
 
-void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) {
-    ctx.OpLoopMerge(merge_label->Definition<Id>(), continue_label->Definition<Id>(),
-                    spv::LoopControlMask::MaskNone);
+void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label) {
+    ctx.OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone);
 }
 
-void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) {
-    ctx.OpSelectionMerge(merge_label->Definition<Id>(), spv::SelectionControlMask::MaskNone);
+void EmitSelectionMerge(EmitContext& ctx, Id merge_label) {
+    ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
 }
 
 void EmitReturn(EmitContext& ctx) {
     ctx.OpReturn();
 }
 
+void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) {
+    ctx.OpDemoteToHelperInvocationEXT();
+    ctx.OpBranch(continue_label);
+}
+
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index 0fcb68050d..1fcaa56dda 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -3,6 +3,8 @@
 #include <array>
 
 #include "common/common_types.h"
+#include "shader_recompiler/stage.h"
+#include "shader_recompiler/program_header.h"
 
 namespace Shader {
 
@@ -15,6 +17,18 @@ public:
     [[nodiscard]] virtual u32 TextureBoundBuffer() = 0;
 
     [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0;
+
+    [[nodiscard]] const ProgramHeader& SPH() const noexcept {
+        return sph;
+    }
+
+    [[nodiscard]] Stage ShaderStage() const noexcept {
+        return stage;
+    }
+
+protected:
+    ProgramHeader sph{};
+    Stage stage{};
 };
 
 } // namespace Shader
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
index 2fb7d576ff..4811242ea0 100644
--- a/src/shader_recompiler/frontend/ir/attribute.cpp
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -13,7 +13,7 @@ bool IsGeneric(Attribute attribute) noexcept {
     return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X;
 }
 
-int GenericAttributeIndex(Attribute attribute) {
+u32 GenericAttributeIndex(Attribute attribute) {
     if (!IsGeneric(attribute)) {
         throw InvalidArgument("Attribute is not generic {}", attribute);
     }
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h
index bb2cad6afd..34ec7e0cd0 100644
--- a/src/shader_recompiler/frontend/ir/attribute.h
+++ b/src/shader_recompiler/frontend/ir/attribute.h
@@ -224,7 +224,7 @@ enum class Attribute : u64 {
 
 [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept;
 
-[[nodiscard]] int GenericAttributeIndex(Attribute attribute);
+[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute);
 
 [[nodiscard]] std::string NameOf(Attribute attribute);
 
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 958282160d..672836c0b7 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -82,6 +82,12 @@ void IREmitter::Return() {
     Inst(Opcode::Return);
 }
 
+void IREmitter::DemoteToHelperInvocation(Block* continue_label) {
+    block->SetBranch(continue_label);
+    continue_label->AddImmediatePredecessor(block);
+    Inst(Opcode::DemoteToHelperInvocation, continue_label);
+}
+
 U32 IREmitter::GetReg(IR::Reg reg) {
     return Inst<U32>(Opcode::GetRegister, reg);
 }
@@ -248,6 +254,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) {
     Inst(Opcode::SetAttribute, attribute, value);
 }
 
+void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
+    Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
+}
+
+void IREmitter::SetFragDepth(const F32& value) {
+    Inst(Opcode::SetFragDepth, value);
+}
+
 U32 IREmitter::WorkgroupIdX() {
     return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)};
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 05263fe8b4..72af5db377 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -36,6 +36,7 @@ public:
     void LoopMerge(Block* merge_block, Block* continue_target);
     void SelectionMerge(Block* merge_block);
     void Return();
+    void DemoteToHelperInvocation(Block* continue_label);
 
     [[nodiscard]] U32 GetReg(IR::Reg reg);
     void SetReg(IR::Reg reg, const U32& value);
@@ -67,6 +68,9 @@ public:
     [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
     void SetAttribute(IR::Attribute attribute, const F32& value);
 
+    void SetFragColor(u32 index, u32 component, const F32& value);
+    void SetFragDepth(const F32& value);
+
     [[nodiscard]] U32 WorkgroupIdX();
     [[nodiscard]] U32 WorkgroupIdY();
     [[nodiscard]] U32 WorkgroupIdZ();
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 5946105d2f..21b7d8a9f4 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -55,8 +55,11 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::LoopMerge:
     case Opcode::SelectionMerge:
     case Opcode::Return:
+    case Opcode::DemoteToHelperInvocation:
     case Opcode::SetAttribute:
     case Opcode::SetAttributeIndexed:
+    case Opcode::SetFragColor:
+    case Opcode::SetFragDepth:
     case Opcode::WriteGlobalU8:
     case Opcode::WriteGlobalS8:
     case Opcode::WriteGlobalU16:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 9052a4903d..593faca528 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -13,6 +13,7 @@ OPCODE(BranchConditional,                                   Void,           U1,
 OPCODE(LoopMerge,                                           Void,           Label,          Label,                                                          )
 OPCODE(SelectionMerge,                                      Void,           Label,                                                                          )
 OPCODE(Return,                                              Void,                                                                                           )
+OPCODE(DemoteToHelperInvocation,                            Void,           Label,                                                                          )
 
 // Context getters/setters
 OPCODE(GetRegister,                                         U32,            Reg,                                                                            )
@@ -28,10 +29,12 @@ OPCODE(GetCbufS16,                                          U32,            U32,
 OPCODE(GetCbufU32,                                          U32,            U32,            U32,                                                            )
 OPCODE(GetCbufF32,                                          F32,            U32,            U32,                                                            )
 OPCODE(GetCbufU64,                                          U64,            U32,            U32,                                                            )
-OPCODE(GetAttribute,                                        U32,            Attribute,                                                                      )
-OPCODE(SetAttribute,                                        Void,           Attribute,      U32,                                                            )
-OPCODE(GetAttributeIndexed,                                 U32,            U32,                                                                            )
-OPCODE(SetAttributeIndexed,                                 Void,           U32,            U32,                                                            )
+OPCODE(GetAttribute,                                        F32,            Attribute,                                                                      )
+OPCODE(SetAttribute,                                        Void,           Attribute,      F32,                                                            )
+OPCODE(GetAttributeIndexed,                                 F32,            U32,                                                                            )
+OPCODE(SetAttributeIndexed,                                 Void,           U32,            F32,                                                            )
+OPCODE(SetFragColor,                                        Void,           U32,            U32,            F32,                                            )
+OPCODE(SetFragDepth,                                        Void,           F32,                                                                            )
 OPCODE(GetZFlag,                                            U1,             Void,                                                                           )
 OPCODE(GetSFlag,                                            U1,             Void,                                                                           )
 OPCODE(GetCFlag,                                            U1,             Void,                                                                           )
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
index bce8b19b3a..733513c8b1 100644
--- a/src/shader_recompiler/frontend/ir/program.h
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -10,6 +10,7 @@
 
 #include "shader_recompiler/frontend/ir/basic_block.h"
 #include "shader_recompiler/shader_info.h"
+#include "shader_recompiler/stage.h"
 
 namespace Shader::IR {
 
@@ -17,6 +18,7 @@ struct Program {
     BlockList blocks;
     BlockList post_order_blocks;
     Info info;
+    Stage stage{};
 };
 
 [[nodiscard]] std::string DumpProgram(const Program& program);
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
index 8fea05f7be..3845ec5fb2 100644
--- a/src/shader_recompiler/frontend/ir/reg.h
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -293,12 +293,12 @@ constexpr size_t NUM_REGS = 256;
     return reg + (-num);
 }
 
-[[nodiscard]] constexpr Reg operator++(Reg& reg) {
+constexpr Reg operator++(Reg& reg) {
     reg = reg + 1;
     return reg;
 }
 
-[[nodiscard]] constexpr Reg operator++(Reg& reg, int) {
+constexpr Reg operator++(Reg& reg, int) {
     const Reg copy{reg};
     reg = reg + 1;
     return copy;
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
index 715c0e92d8..4f6707fae4 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -104,6 +104,7 @@ bool HasFlowTest(Opcode opcode) {
     case Opcode::EXIT:
     case Opcode::JMP:
     case Opcode::JMX:
+    case Opcode::KIL:
     case Opcode::BRK:
     case Opcode::CONT:
     case Opcode::LONGJMP:
@@ -287,6 +288,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
         block->end = pc;
         return AnalysisState::Branch;
     }
+    case Opcode::KIL: {
+        const Predicate pred{inst.Pred()};
+        const auto ir_pred{static_cast<IR::Pred>(pred.index)};
+        const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
+        AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
+        return AnalysisState::Branch;
+    }
     case Opcode::PBK:
     case Opcode::PCNT:
     case Opcode::PEXIT:
@@ -324,13 +332,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
         return AnalysisState::Continue;
     }
     const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
-    AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true);
+    AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
     return AnalysisState::Branch;
 }
 
 void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
-                          EndClass insn_end_class, IR::Condition cond,
-                          bool visit_conditional_inst) {
+                          EndClass insn_end_class, IR::Condition cond) {
     if (block->begin != pc) {
         // If the block doesn't start in the conditional instruction
         // mark it as a label to visit it later
@@ -356,14 +363,16 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
     // Impersonate the visited block with a virtual block
     *block = std::move(virtual_block);
     // Set the end properties of the conditional instruction
-    conditional_block->end = visit_conditional_inst ? (pc + 1) : pc;
+    conditional_block->end = pc + 1;
     conditional_block->end_class = insn_end_class;
     // Add a label to the instruction after the conditional instruction
     Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
     // Branch to the next instruction from the virtual block
     block->branch_false = endif_block;
-    // And branch to it from the conditional instruction if it is a branch
-    if (insn_end_class == EndClass::Branch) {
+    // And branch to it from the conditional instruction if it is a branch or a kill instruction
+    // Kill instructions are considered a branch because they demote to a helper invocation and
+    // execution may continue.
+    if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
         conditional_block->cond = IR::Condition{true};
         conditional_block->branch_true = endif_block;
         conditional_block->branch_false = nullptr;
@@ -415,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati
             throw NotImplementedException("Conditional EXIT with PEXIT token");
         }
         const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
-        AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false);
+        AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
         return AnalysisState::Branch;
     }
     if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
@@ -425,7 +434,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati
         block->branch_false = nullptr;
         return AnalysisState::Branch;
     }
-    block->end = pc;
+    block->end = pc + 1;
     block->end_class = EndClass::Exit;
     return AnalysisState::Branch;
 }
@@ -505,6 +514,12 @@ std::string CFG::Dot() const {
                                    node_uid);
                 ++node_uid;
                 break;
+            case EndClass::Kill:
+                dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+                dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
+                                   node_uid);
+                ++node_uid;
+                break;
             }
         }
         if (function.entrypoint == 8) {
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
index fe74f210fb..22f1341944 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -29,6 +29,7 @@ enum class EndClass {
     Call,
     Exit,
     Return,
+    Kill,
 };
 
 enum class Token {
@@ -130,7 +131,7 @@ private:
     AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
 
     void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
-                         IR::Condition cond, bool visit_conditional_inst);
+                         IR::Condition cond);
 
     /// Return true when the branch instruction is confirmed to be a branch
     bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 8bfa643268..0074eb89b6 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -32,6 +32,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
     IR::Program program;
     program.blocks = VisitAST(inst_pool, block_pool, env, cfg);
     program.post_order_blocks = PostOrder(program.blocks);
+    program.stage = env.ShaderStage();
     RemoveUnreachableBlocks(program);
 
     // Replace instructions before the SSA rewrite
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index 5f5d9cf173..cec03e73ee 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -45,6 +45,7 @@ enum class StatementType {
     Loop,
     Break,
     Return,
+    Kill,
     Function,
     Identity,
     Not,
@@ -70,6 +71,7 @@ struct If {};
 struct Loop {};
 struct Break {};
 struct Return {};
+struct Kill {};
 struct FunctionTag {};
 struct Identity {};
 struct Not {};
@@ -93,6 +95,7 @@ struct Statement : ListBaseHook {
     Statement(Break, Statement* cond_, Statement* up_)
         : cond{cond_}, up{up_}, type{StatementType::Break} {}
     Statement(Return) : type{StatementType::Return} {}
+    Statement(Kill) : type{StatementType::Kill} {}
     Statement(FunctionTag) : children{}, type{StatementType::Function} {}
     Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {}
     Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {}
@@ -174,6 +177,9 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) {
         case StatementType::Return:
             ret += fmt::format("{}    return;\n", indent);
             break;
+        case StatementType::Kill:
+            ret += fmt::format("{}    kill;\n", indent);
+            break;
         case StatementType::SetVariable:
             ret += fmt::format("{}    goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
             break;
@@ -424,6 +430,9 @@ private:
                 gotos.push_back(root.insert(ip, *goto_stmt));
                 break;
             }
+            case Flow::EndClass::Kill:
+                root.insert(ip, *pool.Create(Kill{}));
+                break;
             }
         }
     }
@@ -729,6 +738,15 @@ private:
                 current_block = nullptr;
                 break;
             }
+            case StatementType::Kill: {
+                if (!current_block) {
+                    current_block = block_pool.Create(inst_pool);
+                    block_list.push_back(current_block);
+                }
+                IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block);
+                current_block = nullptr;
+                break;
+            }
             default:
                 throw NotImplementedException("Statement type {}", stmt.type);
             }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp
deleted file mode 100644
index e98bbd0d18..0000000000
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/common_types.h"
-#include "shader_recompiler/exception.h"
-#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
-
-namespace Shader::Maxwell {
-
-void TranslatorVisitor::EXIT(u64) {
-    ir.Exit();
-}
-
-} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 0000000000..ea9b33da9b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ExitFragment(TranslatorVisitor& v) {
+    const ProgramHeader sph{v.env.SPH()};
+    IR::Reg src_reg{IR::Reg::R0};
+    for (u32 render_target = 0; render_target < 8; ++render_target) {
+        const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
+        for (u32 component = 0; component < 4; ++component) {
+            if (!mask[component]) {
+                continue;
+            }
+            v.ir.SetFragColor(render_target, component, v.F(src_reg));
+            ++src_reg;
+        }
+    }
+    if (sph.ps.omap.sample_mask != 0) {
+        throw NotImplementedException("Sample mask");
+    }
+    if (sph.ps.omap.depth != 0) {
+        throw NotImplementedException("Fragment depth");
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::EXIT() {
+    switch (env.ShaderStage()) {
+    case Stage::Fragment:
+        ExitFragment(*this);
+        break;
+    default:
+        break;
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index e3e298c3b6..ed81d9c369 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -108,7 +108,7 @@ public:
     void DSETP_reg(u64 insn);
     void DSETP_cbuf(u64 insn);
     void DSETP_imm(u64 insn);
-    void EXIT(u64 insn);
+    void EXIT();
     void F2F_reg(u64 insn);
     void F2F_cbuf(u64 insn);
     void F2F_imm(u64 insn);
@@ -220,7 +220,7 @@ public:
     void JCAL(u64 insn);
     void JMP(u64 insn);
     void JMX(u64 insn);
-    void KIL(u64 insn);
+    void KIL();
     void LD(u64 insn);
     void LDC(u64 insn);
     void LDG(u64 insn);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
index ad97786d4e..2922145eec 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -11,6 +11,13 @@
 
 namespace Shader::Maxwell {
 namespace {
+enum class Size : u64 {
+    B32,
+    B64,
+    B96,
+    B128,
+};
+
 enum class InterpolationMode : u64 {
     Pass,
     Multiply,
@@ -23,8 +30,85 @@ enum class SampleMode : u64 {
     Centroid,
     Offset,
 };
+
+int NumElements(Size size) {
+    switch (size) {
+    case Size::B32:
+        return 1;
+    case Size::B64:
+        return 2;
+    case Size::B96:
+        return 3;
+    case Size::B128:
+        return 4;
+    }
+    throw InvalidArgument("Invalid size {}", size);
+}
 } // Anonymous namespace
 
+void TranslatorVisitor::ALD(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> index_reg;
+        BitField<20, 10, u64> absolute_offset;
+        BitField<20, 11, s64> relative_offset;
+        BitField<39, 8, IR::Reg> stream_reg;
+        BitField<32, 1, u64> o;
+        BitField<31, 1, u64> patch;
+        BitField<47, 2, Size> size;
+    } const ald{insn};
+
+    if (ald.o != 0) {
+        throw NotImplementedException("O");
+    }
+    if (ald.patch != 0) {
+        throw NotImplementedException("P");
+    }
+    if (ald.index_reg != IR::Reg::RZ) {
+        throw NotImplementedException("Indexed");
+    }
+    const u64 offset{ald.absolute_offset.Value()};
+    if (offset % 4 != 0) {
+        throw NotImplementedException("Unaligned absolute offset {}", offset);
+    }
+    const int num_elements{NumElements(ald.size)};
+    for (int element = 0; element < num_elements; ++element) {
+        F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element}));
+    }
+}
+
+void TranslatorVisitor::AST(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> src_reg;
+        BitField<8, 8, IR::Reg> index_reg;
+        BitField<20, 10, u64> absolute_offset;
+        BitField<20, 11, s64> relative_offset;
+        BitField<31, 1, u64> patch;
+        BitField<39, 8, IR::Reg> stream_reg;
+        BitField<47, 2, Size> size;
+    } const ast{insn};
+
+    if (ast.patch != 0) {
+        throw NotImplementedException("P");
+    }
+    if (ast.stream_reg != IR::Reg::RZ) {
+        throw NotImplementedException("Stream store");
+    }
+    if (ast.index_reg != IR::Reg::RZ) {
+        throw NotImplementedException("Indexed store");
+    }
+    const u64 offset{ast.absolute_offset.Value()};
+    if (offset % 4 != 0) {
+        throw NotImplementedException("Unaligned absolute offset {}", offset);
+    }
+    const int num_elements{NumElements(ast.size)};
+    for (int element = 0; element < num_elements; ++element) {
+        ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element));
+    }
+}
+
 void TranslatorVisitor::IPA(u64 insn) {
     // IPA is the instruction used to read varyings from a fragment shader.
     // gl_FragCoord is mapped to the gl_Position attribute.
@@ -51,7 +135,7 @@ void TranslatorVisitor::IPA(u64 insn) {
     // }
     const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
     if (is_indexed) {
-        throw NotImplementedException("IPA.IDX");
+        throw NotImplementedException("IDX");
     }
 
     const IR::Attribute attribute{ipa.attribute};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 9675cef541..59252bcc50 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -17,14 +17,6 @@ void TranslatorVisitor::AL2P(u64) {
     ThrowNotImplemented(Opcode::AL2P);
 }
 
-void TranslatorVisitor::ALD(u64) {
-    ThrowNotImplemented(Opcode::ALD);
-}
-
-void TranslatorVisitor::AST(u64) {
-    ThrowNotImplemented(Opcode::AST);
-}
-
 void TranslatorVisitor::ATOM_cas(u64) {
     ThrowNotImplemented(Opcode::ATOM_cas);
 }
@@ -153,10 +145,6 @@ void TranslatorVisitor::DSETP_imm(u64) {
     ThrowNotImplemented(Opcode::DSETP_imm);
 }
 
-void TranslatorVisitor::EXIT(u64) {
-    throw LogicError("Visting EXIT instruction");
-}
-
 void TranslatorVisitor::F2F_reg(u64) {
     ThrowNotImplemented(Opcode::F2F_reg);
 }
@@ -345,8 +333,8 @@ void TranslatorVisitor::JMX(u64) {
     ThrowNotImplemented(Opcode::JMX);
 }
 
-void TranslatorVisitor::KIL(u64) {
-    ThrowNotImplemented(Opcode::KIL);
+void TranslatorVisitor::KIL() {
+    // KIL is a no-op
 }
 
 void TranslatorVisitor::LD(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
index 98d9f4c648..0fbb87ec43 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -215,7 +215,7 @@ void TranslatorVisitor::TEX(u64 insn) {
         BitField<36, 13, u64> cbuf_offset;
     } const tex{insn};
 
-    Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset));
+    Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
 }
 
 void TranslatorVisitor::TEX_b(u64 insn) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
index ac1615b007..54f0df7547 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -70,7 +70,7 @@ IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
 
 IR::Value Sample(TranslatorVisitor& v, u64 insn) {
     const Encoding texs{insn};
-    const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset))};
+    const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
     const IR::F32 zero{v.ir.Imm32(0.0f)};
     const IR::Reg reg_a{texs.src_reg_a};
     const IR::Reg reg_b{texs.src_reg_b};
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 708b6b2672..fbbe286322 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -17,10 +17,47 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
         return;
     }
     info.constant_buffer_mask |= 1U << index;
-    info.constant_buffer_descriptors.push_back({
-        .index{index},
-        .count{1},
-    });
+
+    auto& cbufs{info.constant_buffer_descriptors};
+    cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
+                 ConstantBufferDescriptor{
+                     .index{index},
+                     .count{1},
+                 });
+}
+
+void GetAttribute(Info& info, IR::Attribute attribute) {
+    if (IR::IsGeneric(attribute)) {
+        info.loads_generics.at(IR::GenericAttributeIndex(attribute)) = true;
+        return;
+    }
+    switch (attribute) {
+    case IR::Attribute::PositionX:
+    case IR::Attribute::PositionY:
+    case IR::Attribute::PositionZ:
+    case IR::Attribute::PositionW:
+        info.loads_position = true;
+        break;
+    default:
+        throw NotImplementedException("Get attribute {}", attribute);
+    }
+}
+
+void SetAttribute(Info& info, IR::Attribute attribute) {
+    if (IR::IsGeneric(attribute)) {
+        info.stores_generics.at(IR::GenericAttributeIndex(attribute)) = true;
+        return;
+    }
+    switch (attribute) {
+    case IR::Attribute::PositionX:
+    case IR::Attribute::PositionY:
+    case IR::Attribute::PositionZ:
+    case IR::Attribute::PositionW:
+        info.stores_position = true;
+        break;
+    default:
+        throw NotImplementedException("Set attribute {}", attribute);
+    }
 }
 
 void VisitUsages(Info& info, IR::Inst& inst) {
@@ -162,6 +199,21 @@ void VisitUsages(Info& info, IR::Inst& inst) {
         break;
     }
     switch (inst.Opcode()) {
+    case IR::Opcode::DemoteToHelperInvocation:
+        info.uses_demote_to_helper_invocation = true;
+        break;
+    case IR::Opcode::GetAttribute:
+        GetAttribute(info, inst.Arg(0).Attribute());
+        break;
+    case IR::Opcode::SetAttribute:
+        SetAttribute(info, inst.Arg(0).Attribute());
+        break;
+    case IR::Opcode::SetFragColor:
+        info.stores_frag_color[inst.Arg(0).U32()] = true;
+        break;
+    case IR::Opcode::SetFragDepth:
+        info.stores_frag_depth = true;
+        break;
     case IR::Opcode::WorkgroupId:
         info.uses_workgroup_id = true;
         break;
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index d09bcec366..bab7ca1868 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -169,7 +169,7 @@ private:
         const size_t num_args{phi.NumArgs()};
         for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
             const IR::Value& op{phi.Arg(arg_index)};
-            if (op == same || op == IR::Value{&phi}) {
+            if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
                 // Unique value or self-reference
                 continue;
             }
diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h
new file mode 100644
index 0000000000..1544bfa427
--- /dev/null
+++ b/src/shader_recompiler/program_header.h
@@ -0,0 +1,143 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Shader {
+
+enum class OutputTopology : u32 {
+    PointList = 1,
+    LineStrip = 6,
+    TriangleStrip = 7,
+};
+
+enum class PixelImap : u8 {
+    Unused = 0,
+    Constant = 1,
+    Perspective = 2,
+    ScreenLinear = 3,
+};
+
+// Documentation in:
+// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
+struct ProgramHeader {
+    union {
+        BitField<0, 5, u32> sph_type;
+        BitField<5, 5, u32> version;
+        BitField<10, 4, u32> shader_type;
+        BitField<14, 1, u32> mrt_enable;
+        BitField<15, 1, u32> kills_pixels;
+        BitField<16, 1, u32> does_global_store;
+        BitField<17, 4, u32> sass_version;
+        BitField<21, 5, u32> reserved;
+        BitField<26, 1, u32> does_load_or_store;
+        BitField<27, 1, u32> does_fp64;
+        BitField<28, 4, u32> stream_out_mask;
+    } common0;
+
+    union {
+        BitField<0, 24, u32> shader_local_memory_low_size;
+        BitField<24, 8, u32> per_patch_attribute_count;
+    } common1;
+
+    union {
+        BitField<0, 24, u32> shader_local_memory_high_size;
+        BitField<24, 8, u32> threads_per_input_primitive;
+    } common2;
+
+    union {
+        BitField<0, 24, u32> shader_local_memory_crs_size;
+        BitField<24, 4, OutputTopology> output_topology;
+        BitField<28, 4, u32> reserved;
+    } common3;
+
+    union {
+        BitField<0, 12, u32> max_output_vertices;
+        BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
+        BitField<20, 4, u32> reserved;
+        BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
+    } common4;
+
+    union {
+        struct {
+            INSERT_PADDING_BYTES_NOINIT(3);  // ImapSystemValuesA
+            INSERT_PADDING_BYTES_NOINIT(1);  // ImapSystemValuesB
+            INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
+            INSERT_PADDING_BYTES_NOINIT(2);  // ImapColor
+            union {
+                BitField<0, 8, u16> clip_distances;
+                BitField<8, 1, u16> point_sprite_s;
+                BitField<9, 1, u16> point_sprite_t;
+                BitField<10, 1, u16> fog_coordinate;
+                BitField<12, 1, u16> tessellation_eval_point_u;
+                BitField<13, 1, u16> tessellation_eval_point_v;
+                BitField<14, 1, u16> instance_id;
+                BitField<15, 1, u16> vertex_id;
+            };
+            INSERT_PADDING_BYTES_NOINIT(5);  // ImapFixedFncTexture[10]
+            INSERT_PADDING_BYTES_NOINIT(1);  // ImapReserved
+            INSERT_PADDING_BYTES_NOINIT(3);  // OmapSystemValuesA
+            INSERT_PADDING_BYTES_NOINIT(1);  // OmapSystemValuesB
+            INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
+            INSERT_PADDING_BYTES_NOINIT(2);  // OmapColor
+            INSERT_PADDING_BYTES_NOINIT(2);  // OmapSystemValuesC
+            INSERT_PADDING_BYTES_NOINIT(5);  // OmapFixedFncTexture[10]
+            INSERT_PADDING_BYTES_NOINIT(1);  // OmapReserved
+        } vtg;
+
+        struct {
+            INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
+            INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
+
+            union {
+                BitField<0, 2, PixelImap> x;
+                BitField<2, 2, PixelImap> y;
+                BitField<4, 2, PixelImap> z;
+                BitField<6, 2, PixelImap> w;
+                u8 raw;
+            } imap_generic_vector[32];
+
+            INSERT_PADDING_BYTES_NOINIT(2);  // ImapColor
+            INSERT_PADDING_BYTES_NOINIT(2);  // ImapSystemValuesC
+            INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
+            INSERT_PADDING_BYTES_NOINIT(2);  // ImapReserved
+
+            struct {
+                u32 target;
+                union {
+                    BitField<0, 1, u32> sample_mask;
+                    BitField<1, 1, u32> depth;
+                    BitField<2, 30, u32> reserved;
+                };
+            } omap;
+
+            [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept {
+                const u32 bits{omap.target >> (rt * 4)};
+                return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0};
+            }
+
+            [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const {
+                const auto& vector{imap_generic_vector[attribute]};
+                return {vector.x, vector.y, vector.z, vector.w};
+            }
+        } ps;
+
+        std::array<u32, 0xf> raw;
+    };
+
+    [[nodiscard]] u64 LocalMemorySize() const noexcept {
+        return (common1.shader_local_memory_low_size |
+                (common2.shader_local_memory_high_size << 24));
+    }
+};
+static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size");
+
+} // namespace Shader
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp
deleted file mode 100644
index 527e19c272..0000000000
--- a/src/shader_recompiler/recompiler.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <vector>
-
-#include "common/common_types.h"
-#include "shader_recompiler/backend/spirv/emit_spirv.h"
-#include "shader_recompiler/environment.h"
-#include "shader_recompiler/frontend/maxwell/control_flow.h"
-#include "shader_recompiler/frontend/maxwell/program.h"
-#include "shader_recompiler/object_pool.h"
-#include "shader_recompiler/recompiler.h"
-
-namespace Shader {
-
-std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, Environment& env,
-                                                 u32 start_address) {
-    ObjectPool<Maxwell::Flow::Block> flow_block_pool;
-    ObjectPool<IR::Inst> inst_pool;
-    ObjectPool<IR::Block> block_pool;
-
-    Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address};
-    IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)};
-    return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)};
-}
-
-} // namespace Shader
diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h
deleted file mode 100644
index 2529463aec..0000000000
--- a/src/shader_recompiler/recompiler.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <utility>
-#include <vector>
-
-#include "common/common_types.h"
-#include "shader_recompiler/environment.h"
-#include "shader_recompiler/profile.h"
-#include "shader_recompiler/shader_info.h"
-
-namespace Shader {
-
-[[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile,
-                                                               Environment& env, u32 start_address);
-
-} // namespace Shader
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index adc1d9a64a..6eff762e2c 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -56,6 +56,15 @@ struct Info {
 
     bool uses_workgroup_id{};
     bool uses_local_invocation_id{};
+
+    std::array<bool, 32> loads_generics{};
+    bool loads_position{};
+
+    std::array<bool, 8> stores_frag_color{};
+    bool stores_frag_depth{};
+    std::array<bool, 32> stores_generics{};
+    bool stores_position{};
+
     bool uses_fp16{};
     bool uses_fp64{};
     bool uses_fp16_denorms_flush{};
@@ -68,6 +77,7 @@ struct Info {
     bool uses_image_1d{};
     bool uses_sampled_1d{};
     bool uses_sparse_residency{};
+    bool uses_demote_to_helper_invocation{};
 
     IR::Type used_constant_buffer_types{};
 
diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h
new file mode 100644
index 0000000000..fc6ce60435
--- /dev/null
+++ b/src/shader_recompiler/stage.h
@@ -0,0 +1,19 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+namespace Shader {
+
+enum class Stage {
+    Compute,
+    VertexA,
+    VertexB,
+    TessellationControl,
+    TessellationEval,
+    Geometry,
+    Fragment,
+};
+
+} // namespace Shader
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3323e69169..71b07c1940 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -100,6 +100,7 @@ add_library(video_core STATIC
     renderer_vulkan/fixed_pipeline_state.h
     renderer_vulkan/maxwell_to_vk.cpp
     renderer_vulkan/maxwell_to_vk.h
+    renderer_vulkan/pipeline_helper.h
     renderer_vulkan/renderer_vulkan.h
     renderer_vulkan/renderer_vulkan.cpp
     renderer_vulkan/vk_blit_screen.cpp
@@ -116,15 +117,18 @@ add_library(video_core STATIC
     renderer_vulkan/vk_descriptor_pool.h
     renderer_vulkan/vk_fence_manager.cpp
     renderer_vulkan/vk_fence_manager.h
+    renderer_vulkan/vk_graphics_pipeline.cpp
+    renderer_vulkan/vk_graphics_pipeline.h
     renderer_vulkan/vk_master_semaphore.cpp
     renderer_vulkan/vk_master_semaphore.h
     renderer_vulkan/vk_pipeline_cache.cpp
     renderer_vulkan/vk_pipeline_cache.h
-    renderer_vulkan/vk_pipeline.h
     renderer_vulkan/vk_query_cache.cpp
     renderer_vulkan/vk_query_cache.h
     renderer_vulkan/vk_rasterizer.cpp
     renderer_vulkan/vk_rasterizer.h
+    renderer_vulkan/vk_render_pass_cache.cpp
+    renderer_vulkan/vk_render_pass_cache.h
     renderer_vulkan/vk_resource_pool.cpp
     renderer_vulkan/vk_resource_pool.h
     renderer_vulkan/vk_scheduler.cpp
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 362278f015..d8f6839072 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -72,6 +72,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
         regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
     alpha_test_func.Assign(PackComparisonOp(test_func));
     early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
+    depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
+    depth_format.Assign(static_cast<u32>(regs.zeta.format));
+    std::ranges::transform(regs.rt, color_formats.begin(),
+                           [](const auto& rt) { return static_cast<u8>(rt.format); });
 
     alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
     point_size = Common::BitCast<u32>(regs.point_size);
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index a0eb83a68d..348f1d6ce6 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -60,7 +60,7 @@ struct FixedPipelineState {
 
         void Refresh(const Maxwell& regs, size_t index);
 
-        constexpr std::array<bool, 4> Mask() const noexcept {
+        std::array<bool, 4> Mask() const noexcept {
             return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
         }
 
@@ -97,11 +97,11 @@ struct FixedPipelineState {
         BitField<20, 3, u32> type;
         BitField<23, 6, u32> size;
 
-        constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
+        Maxwell::VertexAttribute::Type Type() const noexcept {
             return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
         }
 
-        constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
+        Maxwell::VertexAttribute::Size Size() const noexcept {
             return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
         }
     };
@@ -187,7 +187,10 @@ struct FixedPipelineState {
         u32 raw2;
         BitField<0, 3, u32> alpha_test_func;
         BitField<3, 1, u32> early_z;
+        BitField<4, 1, u32> depth_enabled;
+        BitField<5, 5, u32> depth_format;
     };
+    std::array<u8, Maxwell::NumRenderTargets> color_formats;
 
     u32 alpha_test_ref;
     u32 point_size;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index f088447e94..dc4ff0da2b 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -741,4 +741,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti
     return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
 }
 
+VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
+    switch (msaa_mode) {
+    case Tegra::Texture::MsaaMode::Msaa1x1:
+        return VK_SAMPLE_COUNT_1_BIT;
+    case Tegra::Texture::MsaaMode::Msaa2x1:
+    case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
+        return VK_SAMPLE_COUNT_2_BIT;
+    case Tegra::Texture::MsaaMode::Msaa2x2:
+    case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
+    case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
+        return VK_SAMPLE_COUNT_4_BIT;
+    case Tegra::Texture::MsaaMode::Msaa4x2:
+    case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
+    case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
+    case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
+        return VK_SAMPLE_COUNT_8_BIT;
+    case Tegra::Texture::MsaaMode::Msaa4x4:
+        return VK_SAMPLE_COUNT_16_BIT;
+    default:
+        UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
+        return VK_SAMPLE_COUNT_1_BIT;
+    }
+}
+
 } // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index e3e06ba38a..9f78e15b6a 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -71,4 +71,6 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
 
 VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
 
+VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
+
 } // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
new file mode 100644
index 0000000000..0a59aa6593
--- /dev/null
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -0,0 +1,162 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+
+#include <boost/container/small_vector.hpp>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/textures/texture.h"
+
+namespace Vulkan {
+
+struct TextureHandle {
+    explicit TextureHandle(u32 data, bool via_header_index) {
+        [[likely]] if (via_header_index) {
+            image = data;
+            sampler = data;
+        } else {
+            const Tegra::Texture::TextureHandle handle{data};
+            image = handle.tic_id;
+            sampler = via_header_index ? image : handle.tsc_id.Value();
+        }
+    }
+
+    u32 image;
+    u32 sampler;
+};
+
+struct DescriptorLayoutTuple {
+    vk::DescriptorSetLayout descriptor_set_layout;
+    vk::PipelineLayout pipeline_layout;
+    vk::DescriptorUpdateTemplateKHR descriptor_update_template;
+};
+
+class DescriptorLayoutBuilder {
+public:
+    DescriptorLayoutTuple Create(const vk::Device& device) {
+        DescriptorLayoutTuple result;
+        if (!bindings.empty()) {
+            result.descriptor_set_layout = device.CreateDescriptorSetLayout({
+                .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+                .pNext = nullptr,
+                .flags = 0,
+                .bindingCount = static_cast<u32>(bindings.size()),
+                .pBindings = bindings.data(),
+            });
+        }
+        result.pipeline_layout = device.CreatePipelineLayout({
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .setLayoutCount = result.descriptor_set_layout ? 1U : 0U,
+            .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(),
+            .pushConstantRangeCount = 0,
+            .pPushConstantRanges = nullptr,
+        });
+        if (!entries.empty()) {
+            result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({
+                .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+                .pNext = nullptr,
+                .flags = 0,
+                .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
+                .pDescriptorUpdateEntries = entries.data(),
+                .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
+                .descriptorSetLayout = *result.descriptor_set_layout,
+                .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                .pipelineLayout = *result.pipeline_layout,
+                .set = 0,
+            });
+        }
+        return result;
+    }
+
+    void Add(const Shader::Info& info, VkShaderStageFlags stage) {
+        for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
+            Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage);
+        }
+        for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
+            Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage);
+        }
+        for ([[maybe_unused]] const auto& desc : info.texture_descriptors) {
+            Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage);
+        }
+    }
+
+private:
+    void Add(VkDescriptorType type, VkShaderStageFlags stage) {
+        bindings.push_back({
+            .binding = binding,
+            .descriptorType = type,
+            .descriptorCount = 1,
+            .stageFlags = stage,
+            .pImmutableSamplers = nullptr,
+        });
+        entries.push_back(VkDescriptorUpdateTemplateEntryKHR{
+            .dstBinding = binding,
+            .dstArrayElement = 0,
+            .descriptorCount = 1,
+            .descriptorType = type,
+            .offset = offset,
+            .stride = sizeof(DescriptorUpdateEntry),
+        });
+        ++binding;
+        offset += sizeof(DescriptorUpdateEntry);
+    }
+
+    boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
+    boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
+    u32 binding{};
+    size_t offset{};
+};
+
+inline VideoCommon::ImageViewType CastType(Shader::TextureType type) {
+    switch (type) {
+    case Shader::TextureType::Color1D:
+    case Shader::TextureType::Shadow1D:
+        return VideoCommon::ImageViewType::e1D;
+    case Shader::TextureType::ColorArray1D:
+    case Shader::TextureType::ShadowArray1D:
+        return VideoCommon::ImageViewType::e1DArray;
+    case Shader::TextureType::Color2D:
+    case Shader::TextureType::Shadow2D:
+        return VideoCommon::ImageViewType::e2D;
+    case Shader::TextureType::ColorArray2D:
+    case Shader::TextureType::ShadowArray2D:
+        return VideoCommon::ImageViewType::e2DArray;
+    case Shader::TextureType::Color3D:
+    case Shader::TextureType::Shadow3D:
+        return VideoCommon::ImageViewType::e3D;
+    case Shader::TextureType::ColorCube:
+    case Shader::TextureType::ShadowCube:
+        return VideoCommon::ImageViewType::Cube;
+    case Shader::TextureType::ColorArrayCube:
+    case Shader::TextureType::ShadowArrayCube:
+        return VideoCommon::ImageViewType::CubeArray;
+    }
+    UNREACHABLE_MSG("Invalid texture type {}", type);
+    return {};
+}
+
+inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers,
+                                 const ImageId* image_view_ids, TextureCache& texture_cache,
+                                 VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) {
+    for (const auto& desc : info.texture_descriptors) {
+        const VkSampler sampler{samplers[index]};
+        ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])};
+        const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))};
+        update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
+        ++index;
+    }
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index ef8bef6ffc..6684d37a6e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -6,6 +6,7 @@
 
 #include <boost/container/small_vector.hpp>
 
+#include "video_core/renderer_vulkan/pipeline_helper.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
@@ -17,140 +18,10 @@
 
 namespace Vulkan {
 namespace {
-vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) {
-    boost::container::small_vector<VkDescriptorSetLayoutBinding, 24> bindings;
-    u32 binding{};
-    for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
-        bindings.push_back({
-            .binding = binding,
-            .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        });
-        ++binding;
-    }
-    for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
-        bindings.push_back({
-            .binding = binding,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        });
-        ++binding;
-    }
-    for (const auto& desc : info.texture_descriptors) {
-        bindings.push_back({
-            .binding = binding,
-            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        });
-        ++binding;
-    }
-    return device.GetLogical().CreateDescriptorSetLayout({
-        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .bindingCount = static_cast<u32>(bindings.size()),
-        .pBindings = bindings.data(),
-    });
-}
-
-vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
-    const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout,
-    VkPipelineLayout pipeline_layout) {
-    boost::container::small_vector<VkDescriptorUpdateTemplateEntry, 24> entries;
-    size_t offset{};
-    u32 binding{};
-    for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
-        entries.push_back({
-            .dstBinding = binding,
-            .dstArrayElement = 0,
-            .descriptorCount = 1,
-            .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
-            .offset = offset,
-            .stride = sizeof(DescriptorUpdateEntry),
-        });
-        ++binding;
-        offset += sizeof(DescriptorUpdateEntry);
-    }
-    for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
-        entries.push_back({
-            .dstBinding = binding,
-            .dstArrayElement = 0,
-            .descriptorCount = 1,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .offset = offset,
-            .stride = sizeof(DescriptorUpdateEntry),
-        });
-        ++binding;
-        offset += sizeof(DescriptorUpdateEntry);
-    }
-    for (const auto& desc : info.texture_descriptors) {
-        entries.push_back({
-            .dstBinding = binding,
-            .dstArrayElement = 0,
-            .descriptorCount = 1,
-            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-            .offset = offset,
-            .stride = sizeof(DescriptorUpdateEntry),
-        });
-        ++binding;
-        offset += sizeof(DescriptorUpdateEntry);
-    }
-    return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
-        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
-        .pDescriptorUpdateEntries = entries.data(),
-        .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET,
-        .descriptorSetLayout = descriptor_set_layout,
-        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE,
-        .pipelineLayout = pipeline_layout,
-        .set = 0,
-    });
-}
-
-struct TextureHandle {
-    explicit TextureHandle(u32 data, bool via_header_index) {
-        const Tegra::Texture::TextureHandle handle{data};
-        image = handle.tic_id;
-        sampler = via_header_index ? image : handle.tsc_id.Value();
-    }
-
-    u32 image;
-    u32 sampler;
-};
-
-VideoCommon::ImageViewType CastType(Shader::TextureType type) {
-    switch (type) {
-    case Shader::TextureType::Color1D:
-    case Shader::TextureType::Shadow1D:
-        return VideoCommon::ImageViewType::e1D;
-    case Shader::TextureType::ColorArray1D:
-    case Shader::TextureType::ShadowArray1D:
-        return VideoCommon::ImageViewType::e1DArray;
-    case Shader::TextureType::Color2D:
-    case Shader::TextureType::Shadow2D:
-        return VideoCommon::ImageViewType::e2D;
-    case Shader::TextureType::ColorArray2D:
-    case Shader::TextureType::ShadowArray2D:
-        return VideoCommon::ImageViewType::e2DArray;
-    case Shader::TextureType::Color3D:
-    case Shader::TextureType::Shadow3D:
-        return VideoCommon::ImageViewType::e3D;
-    case Shader::TextureType::ColorCube:
-    case Shader::TextureType::ShadowCube:
-        return VideoCommon::ImageViewType::Cube;
-    case Shader::TextureType::ColorArrayCube:
-    case Shader::TextureType::ShadowArrayCube:
-        return VideoCommon::ImageViewType::CubeArray;
-    }
-    UNREACHABLE_MSG("Invalid texture type {}", type);
+DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) {
+    DescriptorLayoutBuilder builder;
+    builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
+    return builder.Create(device.GetLogical());
 }
 } // Anonymous namespace
 
@@ -158,37 +29,31 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip
                                  VKUpdateDescriptorQueue& update_descriptor_queue_,
                                  const Shader::Info& info_, vk::ShaderModule spv_module_)
     : update_descriptor_queue{&update_descriptor_queue_}, info{info_},
-      spv_module(std::move(spv_module_)),
-      descriptor_set_layout(CreateDescriptorSetLayout(device, info)),
-      descriptor_allocator(descriptor_pool, *descriptor_set_layout),
-      pipeline_layout{device.GetLogical().CreatePipelineLayout({
-          .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-          .pNext = nullptr,
-          .flags = 0,
-          .setLayoutCount = 1,
-          .pSetLayouts = descriptor_set_layout.address(),
-          .pushConstantRangeCount = 0,
-          .pPushConstantRanges = nullptr,
-      })},
-      descriptor_update_template{
-          CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)},
-      pipeline{device.GetLogical().CreateComputePipeline({
-          .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
-          .pNext = nullptr,
-          .flags = 0,
-          .stage{
-              .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-              .pNext = nullptr,
-              .flags = 0,
-              .stage = VK_SHADER_STAGE_COMPUTE_BIT,
-              .module = *spv_module,
-              .pName = "main",
-              .pSpecializationInfo = nullptr,
-          },
-          .layout = *pipeline_layout,
-          .basePipelineHandle = 0,
-          .basePipelineIndex = 0,
-      })} {}
+      spv_module(std::move(spv_module_)) {
+    DescriptorLayoutTuple tuple{CreateLayout(device, info)};
+    descriptor_set_layout = std::move(tuple.descriptor_set_layout);
+    pipeline_layout = std::move(tuple.pipeline_layout);
+    descriptor_update_template = std::move(tuple.descriptor_update_template);
+    descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);
+
+    pipeline = device.GetLogical().CreateComputePipeline({
+        .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stage{
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+            .module = *spv_module,
+            .pName = "main",
+            .pSpecializationInfo = nullptr,
+        },
+        .layout = *pipeline_layout,
+        .basePipelineHandle = 0,
+        .basePipelineIndex = 0,
+    });
+}
 
 void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) {
     buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask);
@@ -211,7 +76,7 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple
     static constexpr size_t max_elements = 64;
     std::array<ImageId, max_elements> image_view_ids;
     boost::container::static_vector<u32, max_elements> image_view_indices;
-    boost::container::static_vector<VkSampler, max_elements> sampler_handles;
+    boost::container::static_vector<VkSampler, max_elements> samplers;
 
     const auto& launch_desc{kepler_compute.launch_description};
     const auto& cbufs{launch_desc.const_buffer_config};
@@ -228,20 +93,14 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple
         image_view_indices.push_back(handle.image);
 
         Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
-        sampler_handles.push_back(sampler->Handle());
+        samplers.push_back(sampler->Handle());
     }
-
     const std::span indices_span(image_view_indices.data(), image_view_indices.size());
     texture_cache.FillComputeImageViews(indices_span, image_view_ids);
 
     size_t index{};
-    for (const auto& desc : info.texture_descriptors) {
-        const VkSampler vk_sampler{sampler_handles[index]};
-        ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])};
-        const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))};
-        update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler);
-        ++index;
-    }
+    PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache,
+                         *update_descriptor_queue, index);
 }
 
 VkDescriptorSet ComputePipeline::UpdateDescriptorSet() {
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 08d73a2a4b..e82e5816b8 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -9,7 +9,6 @@
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_pipeline.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -18,7 +17,7 @@ namespace Vulkan {
 
 class Device;
 
-class ComputePipeline : public Pipeline {
+class ComputePipeline {
 public:
     explicit ComputePipeline() = default;
     explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
new file mode 100644
index 0000000000..a2ec418b12
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -0,0 +1,445 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <span>
+
+#include <boost/container/small_vector.hpp>
+#include <boost/container/static_vector.hpp>
+
+#include "common/bit_field.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/pipeline_helper.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+
+namespace Vulkan {
+namespace {
+using boost::container::small_vector;
+using boost::container::static_vector;
+using VideoCore::Surface::PixelFormat;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+
+DescriptorLayoutTuple CreateLayout(const Device& device, std::span<const Shader::Info> infos) {
+    DescriptorLayoutBuilder builder;
+    for (size_t index = 0; index < infos.size(); ++index) {
+        static constexpr std::array stages{
+            VK_SHADER_STAGE_VERTEX_BIT,
+            VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+            VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+            VK_SHADER_STAGE_GEOMETRY_BIT,
+            VK_SHADER_STAGE_FRAGMENT_BIT,
+        };
+        builder.Add(infos[index], stages.at(index));
+    }
+    return builder.Create(device.GetLogical());
+}
+
+template <class StencilFace>
+VkStencilOpState GetStencilFaceState(const StencilFace& face) {
+    return {
+        .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()),
+        .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()),
+        .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()),
+        .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()),
+        .compareMask = 0,
+        .writeMask = 0,
+        .reference = 0,
+    };
+}
+
+bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
+    static constexpr std::array unsupported_topologies{
+        VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
+        VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
+        VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+        VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
+        VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
+        VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,
+        // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT,
+    };
+    return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end();
+}
+
+VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
+    union Swizzle {
+        u32 raw;
+        BitField<0, 3, Maxwell::ViewportSwizzle> x;
+        BitField<4, 3, Maxwell::ViewportSwizzle> y;
+        BitField<8, 3, Maxwell::ViewportSwizzle> z;
+        BitField<12, 3, Maxwell::ViewportSwizzle> w;
+    };
+    const Swizzle unpacked{swizzle};
+    return VkViewportSwizzleNV{
+        .x = MaxwellToVK::ViewportSwizzle(unpacked.x),
+        .y = MaxwellToVK::ViewportSwizzle(unpacked.y),
+        .z = MaxwellToVK::ViewportSwizzle(unpacked.z),
+        .w = MaxwellToVK::ViewportSwizzle(unpacked.w),
+    };
+}
+
+PixelFormat DecodeFormat(u8 encoded_format) {
+    const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)};
+    if (format == Tegra::RenderTargetFormat::NONE) {
+        return PixelFormat::Invalid;
+    }
+    return PixelFormatFromRenderTargetFormat(format);
+}
+
+RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) {
+    RenderPassKey key;
+    std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat);
+    if (state.depth_enabled != 0) {
+        const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())};
+        key.depth_format = PixelFormatFromDepthFormat(depth_format);
+    } else {
+        key.depth_format = PixelFormat::Invalid;
+    }
+    key.samples = MaxwellToVK::MsaaMode(state.msaa_mode);
+    return key;
+}
+} // Anonymous namespace
+
+GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
+                                   Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_,
+                                   BufferCache& buffer_cache_, TextureCache& texture_cache_,
+                                   const Device& device, VKDescriptorPool& descriptor_pool,
+                                   VKUpdateDescriptorQueue& update_descriptor_queue_,
+                                   RenderPassCache& render_pass_cache,
+                                   const FixedPipelineState& state,
+                                   std::array<vk::ShaderModule, NUM_STAGES> stages,
+                                   const std::array<const Shader::Info*, NUM_STAGES>& infos)
+    : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_},
+      buffer_cache{&buffer_cache_}, scheduler{&scheduler_},
+      update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} {
+    std::ranges::transform(infos, stage_infos.begin(),
+                           [](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
+
+    DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)};
+    descriptor_set_layout = std::move(tuple.descriptor_set_layout);
+    pipeline_layout = std::move(tuple.pipeline_layout);
+    descriptor_update_template = std::move(tuple.descriptor_update_template);
+    descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);
+
+    const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))};
+    MakePipeline(device, state, render_pass);
+}
+
+void GraphicsPipeline::Configure(bool is_indexed) {
+    static constexpr size_t max_images_elements = 64;
+    std::array<ImageId, max_images_elements> image_view_ids;
+    static_vector<u32, max_images_elements> image_view_indices;
+    static_vector<VkSampler, max_images_elements> samplers;
+
+    texture_cache->SynchronizeGraphicsDescriptors();
+    texture_cache->UpdateRenderTargets(false);
+
+    const auto& regs{maxwell3d->regs};
+    const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+        const Shader::Info& info{stage_infos[stage]};
+        buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask);
+        buffer_cache->UnbindGraphicsStorageBuffers(stage);
+        size_t index{};
+        for (const auto& desc : info.storage_buffers_descriptors) {
+            ASSERT(desc.count == 1);
+            buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset,
+                                                    true);
+            ++index;
+        }
+        const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers};
+        for (const auto& desc : info.texture_descriptors) {
+            const u32 cbuf_index{desc.cbuf_index};
+            const u32 cbuf_offset{desc.cbuf_offset};
+            ASSERT(cbufs[cbuf_index].enabled);
+            const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset};
+            const u32 raw_handle{gpu_memory->Read<u32>(addr)};
+
+            const TextureHandle handle(raw_handle, via_header_index);
+            image_view_indices.push_back(handle.image);
+
+            Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)};
+            samplers.push_back(sampler->Handle());
+        }
+    }
+    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+    buffer_cache->UpdateGraphicsBuffers(is_indexed);
+    texture_cache->FillGraphicsImageViews(indices_span, image_view_ids);
+
+    buffer_cache->BindHostGeometryBuffers(is_indexed);
+
+    size_t index{};
+    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+        buffer_cache->BindHostStageBuffers(stage);
+        PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(),
+                             *texture_cache, *update_descriptor_queue, index);
+    }
+    const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
+    update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
+
+    scheduler->BindGraphicsPipeline(*pipeline);
+    scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
+        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
+                                  nullptr);
+    });
+}
+
+void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state,
+                                    VkRenderPass render_pass) {
+    FixedPipelineState::DynamicState dynamic{};
+    if (!device.IsExtExtendedDynamicStateSupported()) {
+        dynamic = state.dynamic_state;
+    }
+    static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
+    static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
+    for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+        const bool instanced = state.binding_divisors[index] != 0;
+        const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
+        vertex_bindings.push_back({
+            .binding = static_cast<u32>(index),
+            .stride = dynamic.vertex_strides[index],
+            .inputRate = rate,
+        });
+        if (instanced) {
+            vertex_binding_divisors.push_back({
+                .binding = static_cast<u32>(index),
+                .divisor = state.binding_divisors[index],
+            });
+        }
+    }
+    static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
+    const auto& input_attributes = stage_infos[0].loads_generics;
+    for (size_t index = 0; index < state.attributes.size(); ++index) {
+        const auto& attribute = state.attributes[index];
+        if (!attribute.enabled || !input_attributes[index]) {
+            continue;
+        }
+        vertex_attributes.push_back({
+            .location = static_cast<u32>(index),
+            .binding = attribute.buffer,
+            .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
+            .offset = attribute.offset,
+        });
+    }
+    VkPipelineVertexInputStateCreateInfo vertex_input_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
+        .pVertexBindingDescriptions = vertex_bindings.data(),
+        .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
+        .pVertexAttributeDescriptions = vertex_attributes.data(),
+    };
+    const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
+        .pNext = nullptr,
+        .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()),
+        .pVertexBindingDivisors = vertex_binding_divisors.data(),
+    };
+    if (!vertex_binding_divisors.empty()) {
+        vertex_input_ci.pNext = &input_divisor_ci;
+    }
+    const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
+    const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .topology = MaxwellToVK::PrimitiveTopology(device, state.topology),
+        .primitiveRestartEnable = state.primitive_restart_enable != 0 &&
+                                  SupportsPrimitiveRestart(input_assembly_topology),
+    };
+    const VkPipelineTessellationStateCreateInfo tessellation_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
+    };
+    VkPipelineViewportStateCreateInfo viewport_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .viewportCount = Maxwell::NumViewports,
+        .pViewports = nullptr,
+        .scissorCount = Maxwell::NumViewports,
+        .pScissors = nullptr,
+    };
+    std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
+    std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
+    VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
+        .pNext = nullptr,
+        .flags = 0,
+        .viewportCount = Maxwell::NumViewports,
+        .pViewportSwizzles = swizzles.data(),
+    };
+    if (device.IsNvViewportSwizzleSupported()) {
+        viewport_ci.pNext = &swizzle_ci;
+    }
+
+    const VkPipelineRasterizationStateCreateInfo rasterization_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .depthClampEnable =
+            static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
+        .rasterizerDiscardEnable =
+            static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
+        .polygonMode = VK_POLYGON_MODE_FILL,
+        .cullMode = static_cast<VkCullModeFlags>(
+            dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
+        .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
+        .depthBiasEnable = state.depth_bias_enable,
+        .depthBiasConstantFactor = 0.0f,
+        .depthBiasClamp = 0.0f,
+        .depthBiasSlopeFactor = 0.0f,
+        .lineWidth = 1.0f,
+    };
+    const VkPipelineMultisampleStateCreateInfo multisample_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode),
+        .sampleShadingEnable = VK_FALSE,
+        .minSampleShading = 0.0f,
+        .pSampleMask = nullptr,
+        .alphaToCoverageEnable = VK_FALSE,
+        .alphaToOneEnable = VK_FALSE,
+    };
+    const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .depthTestEnable = dynamic.depth_test_enable,
+        .depthWriteEnable = dynamic.depth_write_enable,
+        .depthCompareOp = dynamic.depth_test_enable
+                              ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
+                              : VK_COMPARE_OP_ALWAYS,
+        .depthBoundsTestEnable = dynamic.depth_bounds_enable,
+        .stencilTestEnable = dynamic.stencil_enable,
+        .front = GetStencilFaceState(dynamic.front),
+        .back = GetStencilFaceState(dynamic.back),
+        .minDepthBounds = 0.0f,
+        .maxDepthBounds = 0.0f,
+    };
+    static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
+    for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+        static constexpr std::array mask_table{
+            VK_COLOR_COMPONENT_R_BIT,
+            VK_COLOR_COMPONENT_G_BIT,
+            VK_COLOR_COMPONENT_B_BIT,
+            VK_COLOR_COMPONENT_A_BIT,
+        };
+        const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])};
+        if (format == Tegra::RenderTargetFormat::NONE) {
+            continue;
+        }
+        const auto& blend{state.attachments[index]};
+        const std::array mask{blend.Mask()};
+        VkColorComponentFlags write_mask{};
+        for (size_t i = 0; i < mask_table.size(); ++i) {
+            write_mask |= mask[i] ? mask_table[i] : 0;
+        }
+        cb_attachments.push_back({
+            .blendEnable = blend.enable != 0,
+            .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
+            .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
+            .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
+            .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
+            .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
+            .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
+            .colorWriteMask = write_mask,
+        });
+    }
+    const VkPipelineColorBlendStateCreateInfo color_blend_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .logicOpEnable = VK_FALSE,
+        .logicOp = VK_LOGIC_OP_COPY,
+        .attachmentCount = static_cast<u32>(cb_attachments.size()),
+        .pAttachments = cb_attachments.data(),
+        .blendConstants = {},
+    };
+    static_vector<VkDynamicState, 17> dynamic_states{
+        VK_DYNAMIC_STATE_VIEWPORT,           VK_DYNAMIC_STATE_SCISSOR,
+        VK_DYNAMIC_STATE_DEPTH_BIAS,         VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+        VK_DYNAMIC_STATE_DEPTH_BOUNDS,       VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+        VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+    };
+    if (device.IsExtExtendedDynamicStateSupported()) {
+        static constexpr std::array extended{
+            VK_DYNAMIC_STATE_CULL_MODE_EXT,
+            VK_DYNAMIC_STATE_FRONT_FACE_EXT,
+            VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT,
+            VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
+            VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
+            VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
+            VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT,
+            VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
+            VK_DYNAMIC_STATE_STENCIL_OP_EXT,
+        };
+        dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
+    }
+    const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
+        .pDynamicStates = dynamic_states.data(),
+    };
+    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+        .pNext = nullptr,
+        .requiredSubgroupSize = GuestWarpSize,
+    };
+    static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages;
+    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+        if (!spv_modules[stage]) {
+            continue;
+        }
+        [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .pNext = nullptr,
+            .flags = 0,
+            .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)),
+            .module = *spv_modules[stage],
+            .pName = "main",
+            .pSpecializationInfo = nullptr,
+        });
+        /*
+        if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
+            stage_ci.pNext = &subgroup_size_ci;
+        }
+        */
+    }
+    pipeline = device.GetLogical().CreateGraphicsPipeline({
+        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .stageCount = static_cast<u32>(shader_stages.size()),
+        .pStages = shader_stages.data(),
+        .pVertexInputState = &vertex_input_ci,
+        .pInputAssemblyState = &input_assembly_ci,
+        .pTessellationState = &tessellation_ci,
+        .pViewportState = &viewport_ci,
+        .pRasterizationState = &rasterization_ci,
+        .pMultisampleState = &multisample_ci,
+        .pDepthStencilState = &depth_stencil_ci,
+        .pColorBlendState = &color_blend_ci,
+        .pDynamicState = &dynamic_state_ci,
+        .layout = *pipeline_layout,
+        .renderPass = render_pass,
+        .subpass = 0,
+        .basePipelineHandle = nullptr,
+        .basePipelineIndex = 0,
+    });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
new file mode 100644
index 0000000000..ba1d34a837
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+
+#include "shader_recompiler/shader_info.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+class Device;
+class RenderPassCache;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+class GraphicsPipeline {
+    static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
+
+public:
+    explicit GraphicsPipeline() = default;
+    explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d,
+                              Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
+                              BufferCache& buffer_cache,
+                              TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool,
+                              VKUpdateDescriptorQueue& update_descriptor_queue,
+                              RenderPassCache& render_pass_cache, const FixedPipelineState& state,
+                              std::array<vk::ShaderModule, NUM_STAGES> stages,
+                              const std::array<const Shader::Info*, NUM_STAGES>& infos);
+
+    void Configure(bool is_indexed);
+
+    GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default;
+    GraphicsPipeline(GraphicsPipeline&&) noexcept = default;
+
+    GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
+    GraphicsPipeline(const GraphicsPipeline&) = delete;
+
+private:
+    void MakePipeline(const Device& device, const FixedPipelineState& state,
+                      VkRenderPass render_pass);
+
+    Tegra::Engines::Maxwell3D* maxwell3d{};
+    Tegra::MemoryManager* gpu_memory{};
+    TextureCache* texture_cache{};
+    BufferCache* buffer_cache{};
+    VKScheduler* scheduler{};
+    VKUpdateDescriptorQueue* update_descriptor_queue{};
+
+    std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
+    std::array<Shader::Info, NUM_STAGES> stage_infos;
+    vk::DescriptorSetLayout descriptor_set_layout;
+    DescriptorAllocator descriptor_allocator;
+    vk::PipelineLayout pipeline_layout;
+    vk::DescriptorUpdateTemplateKHR descriptor_update_template;
+    vk::Pipeline pipeline;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h
deleted file mode 100644
index b062884035..0000000000
--- a/src/video_core/renderer_vulkan/vk_pipeline.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstddef>
-
-#include "video_core/vulkan_common/vulkan_wrapper.h"
-
-namespace Vulkan {
-
-class Pipeline {
-public:
-    /// Add a reference count to the pipeline
-    void AddRef() noexcept {
-        ++ref_count;
-    }
-
-    [[nodiscard]] bool RemoveRef() noexcept {
-        --ref_count;
-        return ref_count == 0;
-    }
-
-    [[nodiscard]] u64 UsageTick() const noexcept {
-        return usage_tick;
-    }
-
-protected:
-    u64 usage_tick{};
-
-private:
-    size_t ref_count{};
-};
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 5477a2903b..c9da2080d4 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -12,8 +12,11 @@
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/memory.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
 #include "shader_recompiler/environment.h"
-#include "shader_recompiler/recompiler.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/program.h"
+#include "shader_recompiler/program_header.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
@@ -34,18 +37,18 @@
 namespace Vulkan {
 MICROPROFILE_DECLARE(Vulkan_PipelineCache);
 
-using Tegra::Engines::ShaderType;
-
 namespace {
-class Environment final : public Shader::Environment {
+using Shader::Backend::SPIRV::EmitSPIRV;
+
+class GenericEnvironment : public Shader::Environment {
 public:
-    explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_,
-                         Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
-        : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {}
+    explicit GenericEnvironment() = default;
+    explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
+        : gpu_memory{&gpu_memory_}, program_base{program_base_} {}
 
-    ~Environment() override = default;
+    ~GenericEnvironment() override = default;
 
-    [[nodiscard]] std::optional<u128> Analyze(u32 start_address) {
+    std::optional<u128> Analyze(u32 start_address) {
         const std::optional<u64> size{TryFindSize(start_address)};
         if (!size) {
             return std::nullopt;
@@ -55,52 +58,47 @@ public:
         return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size());
     }
 
-    [[nodiscard]] size_t ShaderSize() const noexcept {
+    [[nodiscard]] size_t CachedSize() const noexcept {
+        return cached_highest - cached_lowest + INST_SIZE;
+    }
+
+    [[nodiscard]] size_t ReadSize() const noexcept {
         return read_highest - read_lowest + INST_SIZE;
     }
 
-    [[nodiscard]] u128 ComputeHash() const {
-        const size_t size{ShaderSize()};
+    [[nodiscard]] u128 CalculateHash() const {
+        const size_t size{ReadSize()};
         auto data = std::make_unique<u64[]>(size);
-        gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size);
+        gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
         return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size);
     }
 
-    u64 ReadInstruction(u32 address) override {
+    u64 ReadInstruction(u32 address) final {
         read_lowest = std::min(read_lowest, address);
         read_highest = std::max(read_highest, address);
 
         if (address >= cached_lowest && address < cached_highest) {
             return code[address / INST_SIZE];
         }
-        return gpu_memory.Read<u64>(program_base + address);
-    }
-
-    u32 TextureBoundBuffer() override {
-        return kepler_compute.regs.tex_cb_index;
-    }
-
-    std::array<u32, 3> WorkgroupSize() override {
-        const auto& qmd{kepler_compute.launch_description};
-        return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
+        return gpu_memory->Read<u64>(program_base + address);
     }
 
-private:
+protected:
     static constexpr size_t INST_SIZE = sizeof(u64);
-    static constexpr size_t BLOCK_SIZE = 0x1000;
-    static constexpr size_t MAXIMUM_SIZE = 0x100000;
 
-    static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
-    static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
+    std::optional<u64> TryFindSize(GPUVAddr guest_addr) {
+        constexpr size_t BLOCK_SIZE = 0x1000;
+        constexpr size_t MAXIMUM_SIZE = 0x100000;
+
+        constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
+        constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
 
-    std::optional<u64> TryFindSize(u32 start_address) {
-        GPUVAddr guest_addr = program_base + start_address;
         size_t offset = 0;
         size_t size = BLOCK_SIZE;
         while (size <= MAXIMUM_SIZE) {
             code.resize(size / INST_SIZE);
             u64* const data = code.data() + offset / INST_SIZE;
-            gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE);
+            gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
             for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) {
                 const u64 inst = data[i / INST_SIZE];
                 if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
@@ -114,17 +112,87 @@ private:
         return std::nullopt;
     }
 
-    Tegra::Engines::KeplerCompute& kepler_compute;
-    Tegra::MemoryManager& gpu_memory;
-    GPUVAddr program_base;
+    Tegra::MemoryManager* gpu_memory{};
+    GPUVAddr program_base{};
+
+    std::vector<u64> code;
 
-    u32 read_lowest = 0;
+    u32 read_lowest = std::numeric_limits<u32>::max();
     u32 read_highest = 0;
 
-    std::vector<u64> code;
     u32 cached_lowest = std::numeric_limits<u32>::max();
     u32 cached_highest = 0;
 };
+
+class GraphicsEnvironment final : public GenericEnvironment {
+public:
+    explicit GraphicsEnvironment() = default;
+    explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
+                                 Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program,
+                                 GPUVAddr program_base_, u32 start_offset)
+        : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} {
+        gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph));
+        switch (program) {
+        case Maxwell::ShaderProgram::VertexA:
+            stage = Shader::Stage::VertexA;
+            break;
+        case Maxwell::ShaderProgram::VertexB:
+            stage = Shader::Stage::VertexB;
+            break;
+        case Maxwell::ShaderProgram::TesselationControl:
+            stage = Shader::Stage::TessellationControl;
+            break;
+        case Maxwell::ShaderProgram::TesselationEval:
+            stage = Shader::Stage::TessellationEval;
+            break;
+        case Maxwell::ShaderProgram::Geometry:
+            stage = Shader::Stage::Geometry;
+            break;
+        case Maxwell::ShaderProgram::Fragment:
+            stage = Shader::Stage::Fragment;
+            break;
+        default:
+            UNREACHABLE_MSG("Invalid program={}", program);
+        }
+    }
+
+    ~GraphicsEnvironment() override = default;
+
+    u32 TextureBoundBuffer() override {
+        return maxwell3d->regs.tex_cb_index;
+    }
+
+    std::array<u32, 3> WorkgroupSize() override {
+        throw Shader::LogicError("Requesting workgroup size in a graphics stage");
+    }
+
+private:
+    Tegra::Engines::Maxwell3D* maxwell3d{};
+};
+
+class ComputeEnvironment final : public GenericEnvironment {
+public:
+    explicit ComputeEnvironment() = default;
+    explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
+                                Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
+        : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} {
+        stage = Shader::Stage::Compute;
+    }
+
+    ~ComputeEnvironment() override = default;
+
+    u32 TextureBoundBuffer() override {
+        return kepler_compute->regs.tex_cb_index;
+    }
+
+    std::array<u32, 3> WorkgroupSize() override {
+        const auto& qmd{kepler_compute->launch_description};
+        return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
+    }
+
+private:
+    Tegra::Engines::KeplerCompute* kepler_compute{};
+};
 } // Anonymous namespace
 
 size_t ComputePipelineCacheKey::Hash() const noexcept {
@@ -136,19 +204,67 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
     return std::memcmp(&rhs, this, sizeof *this) == 0;
 }
 
+size_t GraphicsPipelineCacheKey::Hash() const noexcept {
+    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
+    return static_cast<size_t>(hash);
+}
+
+bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
+    return std::memcmp(&rhs, this, Size()) == 0;
+}
+
 PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
                              Tegra::Engines::Maxwell3D& maxwell3d_,
                              Tegra::Engines::KeplerCompute& kepler_compute_,
                              Tegra::MemoryManager& gpu_memory_, const Device& device_,
                              VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
-                             VKUpdateDescriptorQueue& update_descriptor_queue_)
+                             VKUpdateDescriptorQueue& update_descriptor_queue_,
+                             RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
+                             TextureCache& texture_cache_)
     : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
       kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
-      scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
-                                                                    update_descriptor_queue_} {}
+      scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
+      update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
+      buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {
+    const auto& float_control{device.FloatControlProperties()};
+    profile = Shader::Profile{
+        .unified_descriptor_binding = true,
+        .support_float_controls = true,
+        .support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
+                                            VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+        .support_separate_rounding_mode =
+            float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+        .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
+        .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
+        .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
+        .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
+        .support_fp16_signed_zero_nan_preserve =
+            float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
+        .support_fp32_signed_zero_nan_preserve =
+            float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
+        .has_broken_spirv_clamp = true, // TODO: is_intel
+    };
+}
 
 PipelineCache::~PipelineCache() = default;
 
+GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
+    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
+
+    if (!RefreshStages()) {
+        return nullptr;
+    }
+    graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
+
+    const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
+    auto& pipeline{pair->second};
+    if (!is_new) {
+        return &pipeline;
+    }
+    pipeline = CreateGraphicsPipeline();
+    return &pipeline;
+}
+
 ComputePipeline* PipelineCache::CurrentComputePipeline() {
     MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 
@@ -170,45 +286,130 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
         return &pipeline;
     }
     pipeline = CreateComputePipeline(shader);
-    shader->compute_users.push_back(key);
     return &pipeline;
 }
 
+bool PipelineCache::RefreshStages() {
+    const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
+            graphics_key.unique_hashes[index] = u128{};
+            continue;
+        }
+        const auto& shader_config{maxwell3d.regs.shader_config[index]};
+        const auto program{static_cast<Maxwell::ShaderProgram>(index)};
+        const GPUVAddr shader_addr{base_addr + shader_config.offset};
+        const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+        if (!cpu_shader_addr) {
+            LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr);
+            return false;
+        }
+        const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
+        if (!shader_info) {
+            const u32 offset{shader_config.offset};
+            shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr);
+        }
+        graphics_key.unique_hashes[index] = shader_info->unique_hash;
+    }
+    return true;
+}
+
+const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr,
+                                                u32 start_address, VAddr cpu_addr) {
+    GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
+    auto info = std::make_unique<ShaderInfo>();
+    if (const std::optional<u128> cached_hash{env.Analyze(start_address)}) {
+        info->unique_hash = *cached_hash;
+        info->size_bytes = env.CachedSize();
+    } else {
+        // Slow path, not really hit on commercial games
+        // Build a control flow graph to get the real shader size
+        flow_block_pool.ReleaseContents();
+        Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address};
+        info->unique_hash = env.CalculateHash();
+        info->size_bytes = env.ReadSize();
+    }
+    const size_t size_bytes{info->size_bytes};
+    const ShaderInfo* const result{info.get()};
+    Register(std::move(info), cpu_addr, size_bytes);
+    return result;
+}
+
+GraphicsPipeline PipelineCache::CreateGraphicsPipeline() {
+    flow_block_pool.ReleaseContents();
+    inst_pool.ReleaseContents();
+    block_pool.ReleaseContents();
+
+    std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> envs;
+    std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+
+    const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        if (graphics_key.unique_hashes[index] == u128{}) {
+            continue;
+        }
+        const auto program{static_cast<Maxwell::ShaderProgram>(index)};
+        GraphicsEnvironment& env{envs[index]};
+        const u32 start_address{maxwell3d.regs.shader_config[index].offset};
+        env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
+
+        const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader);
+        Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset);
+        programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg);
+    }
+    std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
+    std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
+
+    u32 binding{0};
+    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        if (graphics_key.unique_hashes[index] == u128{}) {
+            continue;
+        }
+        UNIMPLEMENTED_IF(index == 0);
+
+        GraphicsEnvironment& env{envs[index]};
+        Shader::IR::Program& program{programs[index]};
+
+        const size_t stage_index{index - 1};
+        infos[stage_index] = &program.info;
+        std::vector<u32> code{EmitSPIRV(profile, env, program, binding)};
+
+        FILE* file = fopen("D:\\shader.spv", "wb");
+        fwrite(code.data(), 4, code.size(), file);
+        fclose(file);
+        std::system("spirv-cross --vulkan-semantics D:\\shader.spv");
+
+        modules[stage_index] = BuildShader(device, code);
+    }
+    return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device,
+                            descriptor_pool, update_descriptor_queue, render_pass_cache,
+                            graphics_key.state, std::move(modules), infos);
+}
+
 ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {
     const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
     const auto& qmd{kepler_compute.launch_description};
-    Environment env{kepler_compute, gpu_memory, program_base};
+    ComputeEnvironment env{kepler_compute, gpu_memory, program_base};
     if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) {
         // TODO: Load from cache
     }
-    const auto& float_control{device.FloatControlProperties()};
-    const Shader::Profile profile{
-        .unified_descriptor_binding = true,
-        .support_float_controls = true,
-        .support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
-                                            VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
-        .support_separate_rounding_mode =
-            float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
-        .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
-        .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
-        .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
-        .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
-        .support_fp16_signed_zero_nan_preserve =
-            float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
-        .support_fp32_signed_zero_nan_preserve =
-            float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
-        .has_broken_spirv_clamp = true, // TODO: is_intel
-    };
-    const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)};
+    flow_block_pool.ReleaseContents();
+    inst_pool.ReleaseContents();
+    block_pool.ReleaseContents();
+
+    Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start};
+    Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)};
+    u32 binding{0};
+    std::vector<u32> code{EmitSPIRV(profile, env, program, binding)};
     /*
     FILE* file = fopen("D:\\shader.spv", "wb");
     fwrite(code.data(), 4, code.size(), file);
     fclose(file);
     std::system("spirv-dis D:\\shader.spv");
     */
-    shader_info->unique_hash = env.ComputeHash();
-    shader_info->size_bytes = env.ShaderSize();
-    return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info,
+    shader_info->unique_hash = env.CalculateHash();
+    shader_info->size_bytes = env.ReadSize();
+    return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info,
                            BuildShader(device, code)};
 }
 
@@ -216,9 +417,6 @@ ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_
     ShaderInfo shader;
     ComputePipeline pipeline{CreateComputePipeline(&shader)};
     const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)};
-    shader.compute_users.push_back(key);
-    pipeline.AddRef();
-
     const size_t size_bytes{shader.size_bytes};
     Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes);
     return &compute_cache.emplace(key, std::move(pipeline)).first->second;
@@ -233,18 +431,4 @@ ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash)
     };
 }
 
-void PipelineCache::OnShaderRemoval(ShaderInfo* shader) {
-    for (const ComputePipelineCacheKey& key : shader->compute_users) {
-        const auto it = compute_cache.find(key);
-        ASSERT(it != compute_cache.end());
-
-        Pipeline& pipeline = it->second;
-        if (pipeline.RemoveRef()) {
-            // Wait for the pipeline to be free of GPU usage before destroying it
-            scheduler.Wait(pipeline.UsageTick());
-            compute_cache.erase(it);
-        }
-    }
-}
-
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index eb35abc27f..60fb976dfa 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -12,11 +12,18 @@
 #include <utility>
 #include <vector>
 
-#include <boost/functional/hash.hpp>
-
 #include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/object_pool.h"
+#include "shader_recompiler/profile.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/shader_cache.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
@@ -26,13 +33,6 @@ class System;
 
 namespace Vulkan {
 
-class Device;
-class RasterizerVulkan;
-class ComputePipeline;
-class VKDescriptorPool;
-class VKScheduler;
-class VKUpdateDescriptorQueue;
-
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 struct ComputePipelineCacheKey {
@@ -52,6 +52,26 @@ static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>)
 static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
 static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
 
+struct GraphicsPipelineCacheKey {
+    std::array<u128, 6> unique_hashes;
+    FixedPipelineState state;
+
+    size_t Hash() const noexcept;
+
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
+
+    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return !operator==(rhs);
+    }
+
+    size_t Size() const noexcept {
+        return sizeof(unique_hashes) + state.Size();
+    }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
+
 } // namespace Vulkan
 
 namespace std {
@@ -63,14 +83,28 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
     }
 };
 
+template <>
+struct hash<Vulkan::GraphicsPipelineCacheKey> {
+    size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
 } // namespace std
 
 namespace Vulkan {
 
+class ComputePipeline;
+class Device;
+class RasterizerVulkan;
+class RenderPassCache;
+class VKDescriptorPool;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
 struct ShaderInfo {
     u128 unique_hash{};
     size_t size_bytes{};
-    std::vector<ComputePipelineCacheKey> compute_users;
 };
 
 class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> {
@@ -80,15 +114,23 @@ public:
                            Tegra::Engines::KeplerCompute& kepler_compute,
                            Tegra::MemoryManager& gpu_memory, const Device& device,
                            VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
-                           VKUpdateDescriptorQueue& update_descriptor_queue);
+                           VKUpdateDescriptorQueue& update_descriptor_queue,
+                           RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
+                           TextureCache& texture_cache);
     ~PipelineCache() override;
 
-    [[nodiscard]] ComputePipeline* CurrentComputePipeline();
+    [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
 
-protected:
-    void OnShaderRemoval(ShaderInfo* shader) override;
+    [[nodiscard]] ComputePipeline* CurrentComputePipeline();
 
 private:
+    bool RefreshStages();
+
+    const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr,
+                                     u32 start_address, VAddr cpu_addr);
+
+    GraphicsPipeline CreateGraphicsPipeline();
+
     ComputePipeline CreateComputePipeline(ShaderInfo* shader);
 
     ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr);
@@ -104,8 +146,20 @@ private:
     VKScheduler& scheduler;
     VKDescriptorPool& descriptor_pool;
     VKUpdateDescriptorQueue& update_descriptor_queue;
+    RenderPassCache& render_pass_cache;
+    BufferCache& buffer_cache;
+    TextureCache& texture_cache;
+
+    GraphicsPipelineCacheKey graphics_key{};
 
     std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache;
+    std::unordered_map<GraphicsPipelineCacheKey, GraphicsPipeline> graphics_cache;
+
+    Shader::ObjectPool<Shader::IR::Inst> inst_pool;
+    Shader::ObjectPool<Shader::IR::Block> block_pool;
+    Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block_pool;
+
+    Shader::Profile profile;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index c94419d29c..036b531b92 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -141,15 +141,18 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
       blit_image(device, scheduler, state_tracker, descriptor_pool),
       astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
                         memory_allocator),
-      texture_cache_runtime{device,       scheduler,  memory_allocator,
-                            staging_pool, blit_image, astc_decoder_pass},
+      render_pass_cache(device), texture_cache_runtime{device,           scheduler,
+                                                       memory_allocator, staging_pool,
+                                                       blit_image,       astc_decoder_pass,
+                                                       render_pass_cache},
       texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
                            update_descriptor_queue, descriptor_pool),
       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
       pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
-                     descriptor_pool, update_descriptor_queue),
-      query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
+                     descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
+                     texture_cache),
+      query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache },
       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
       wfi_event(device.GetLogical().CreateEvent()) {
     scheduler.SetQueryCache(query_cache);
@@ -158,7 +161,39 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
 RasterizerVulkan::~RasterizerVulkan() = default;
 
 void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
-    UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced);
+    MICROPROFILE_SCOPE(Vulkan_Drawing);
+
+    SCOPE_EXIT({ gpu.TickWork(); });
+    FlushWork();
+
+    query_cache.UpdateCounters();
+
+    GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
+    if (!pipeline) {
+        return;
+    }
+    update_descriptor_queue.Acquire();
+    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+    pipeline->Configure(is_indexed);
+
+    BeginTransformFeedback();
+
+    scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
+    UpdateDynamicStates();
+
+    const auto& regs{maxwell3d.regs};
+    const u32 num_instances{maxwell3d.mme_draw.instance_count};
+    const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
+    scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
+        if (draw_params.is_indexed) {
+            cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
+                               draw_params.base_vertex, draw_params.base_instance);
+        } else {
+            cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
+                        draw_params.base_vertex, draw_params.base_instance);
+        }
+    });
+    EndTransformFeedback();
 }
 
 void RasterizerVulkan::Clear() {
@@ -487,13 +522,11 @@ void RasterizerVulkan::FlushWork() {
     if ((++draw_counter & 7) != 7) {
         return;
     }
-
     if (draw_counter < DRAWS_TO_DISPATCH) {
         // Send recorded tasks to the worker thread
         scheduler.DispatchWork();
         return;
     }
-
     // Otherwise (every certain number of draws) flush execution.
     // This submits commands to the Vulkan driver.
     scheduler.Flush();
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 3fd03b9155..88dbd753b0 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -23,6 +23,7 @@
 #include "video_core/renderer_vulkan/vk_fence_manager.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
@@ -148,6 +149,7 @@ private:
     VKUpdateDescriptorQueue update_descriptor_queue;
     BlitImageHelper blit_image;
     ASTCDecoderPass astc_decoder_pass;
+    RenderPassCache render_pass_cache;
 
     TextureCacheRuntime texture_cache_runtime;
     TextureCache texture_cache;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
new file mode 100644
index 0000000000..7e5ae43ea9
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -0,0 +1,100 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+
+#include <boost/container/static_vector.hpp>
+
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
+#include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+namespace {
+using VideoCore::Surface::PixelFormat;
+
+constexpr std::array ATTACHMENT_REFERENCES{
+    VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
+    VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
+    VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
+    VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
+    VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
+    VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
+    VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
+    VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
+    VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
+};
+
+VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
+                                              VkSampleCountFlagBits samples) {
+    using MaxwellToVK::SurfaceFormat;
+    return {
+        .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
+        .format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
+        .samples = samples,
+        .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+        .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+        .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+        .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+        .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+        .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+    };
+}
+} // Anonymous namespace
+
+RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
+
+VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
+    const auto [pair, is_new] = cache.try_emplace(key);
+    if (!is_new) {
+        return *pair->second;
+    }
+    boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
+    u32 num_images{0};
+
+    for (size_t index = 0; index < key.color_formats.size(); ++index) {
+        const PixelFormat format{key.color_formats[index]};
+        if (format == PixelFormat::Invalid) {
+            continue;
+        }
+        descriptions.push_back(AttachmentDescription(*device, format, key.samples));
+        ++num_images;
+    }
+    const size_t num_colors{descriptions.size()};
+    const VkAttachmentReference* depth_attachment{};
+    if (key.depth_format != PixelFormat::Invalid) {
+        depth_attachment = &ATTACHMENT_REFERENCES[num_colors];
+        descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
+    }
+    const VkSubpassDescription subpass{
+        .flags = 0,
+        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+        .inputAttachmentCount = 0,
+        .pInputAttachments = nullptr,
+        .colorAttachmentCount = static_cast<u32>(num_colors),
+        .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
+        .pResolveAttachments = nullptr,
+        .pDepthStencilAttachment = depth_attachment,
+        .preserveAttachmentCount = 0,
+        .pPreserveAttachments = nullptr,
+    };
+    pair->second = device->GetLogical().CreateRenderPass({
+        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .attachmentCount = static_cast<u32>(descriptions.size()),
+        .pAttachments = descriptions.data(),
+        .subpassCount = 1,
+        .pSubpasses = &subpass,
+        .dependencyCount = 0,
+        .pDependencies = nullptr,
+    });
+    return *pair->second;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
new file mode 100644
index 0000000000..db8e83f1aa
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+
+#include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+struct RenderPassKey {
+    auto operator<=>(const RenderPassKey&) const noexcept = default;
+
+    std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
+    VideoCore::Surface::PixelFormat depth_format;
+    VkSampleCountFlagBits samples;
+};
+
+} // namespace Vulkan
+
+namespace std {
+template <>
+struct hash<Vulkan::RenderPassKey> {
+    [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
+        size_t value = static_cast<size_t>(key.depth_format) << 48;
+        value ^= static_cast<size_t>(key.samples) << 52;
+        for (size_t i = 0; i < key.color_formats.size(); ++i) {
+            value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
+        }
+        return value;
+    }
+};
+} // namespace std
+
+namespace Vulkan {
+
+    class Device;
+
+class RenderPassCache {
+public:
+    explicit RenderPassCache(const Device& device_);
+
+    VkRenderPass Get(const RenderPassKey& key);
+
+private:
+    const Device* device{};
+    std::unordered_map<RenderPassKey, vk::RenderPass> cache;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 88ccf96f51..1bbc542a1c 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -18,6 +18,7 @@
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange;
 using VideoCore::Surface::IsPixelFormatASTC;
 
 namespace {
-
-constexpr std::array ATTACHMENT_REFERENCES{
-    VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
-    VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
-};
-
 constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     if (color == std::array<float, 4>{0, 0, 0, 0}) {
         return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
@@ -226,23 +214,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     }
 }
 
-[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
-                                                            const ImageView* image_view) {
-    using MaxwellToVK::SurfaceFormat;
-    const PixelFormat pixel_format = image_view->format;
-    return VkAttachmentDescription{
-        .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
-        .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format,
-        .samples = image_view->Samples(),
-        .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-        .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-        .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-        .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
-        .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-        .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-    };
-}
-
 [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
     switch (swizzle) {
     case SwizzleSource::Zero:
@@ -1164,7 +1135,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
 
 Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
                          ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
-    std::vector<VkAttachmentDescription> descriptions;
     std::vector<VkImageView> attachments;
     RenderPassKey renderpass_key{};
     s32 num_layers = 1;
@@ -1175,7 +1145,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
             renderpass_key.color_formats[index] = PixelFormat::Invalid;
             continue;
         }
-        descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
         attachments.push_back(color_buffer->RenderTarget());
         renderpass_key.color_formats[index] = color_buffer->format;
         num_layers = std::max(num_layers, color_buffer->range.extent.layers);
@@ -1185,10 +1154,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
         ++num_images;
     }
     const size_t num_colors = attachments.size();
-    const VkAttachmentReference* depth_attachment =
-        depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
     if (depth_buffer) {
-        descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
         attachments.push_back(depth_buffer->RenderTarget());
         renderpass_key.depth_format = depth_buffer->format;
         num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
@@ -1201,40 +1167,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
     }
     renderpass_key.samples = samples;
 
-    const auto& device = runtime.device.GetLogical();
-    const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key);
-    if (is_new) {
-        const VkSubpassDescription subpass{
-            .flags = 0,
-            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-            .inputAttachmentCount = 0,
-            .pInputAttachments = nullptr,
-            .colorAttachmentCount = static_cast<u32>(num_colors),
-            .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
-            .pResolveAttachments = nullptr,
-            .pDepthStencilAttachment = depth_attachment,
-            .preserveAttachmentCount = 0,
-            .pPreserveAttachments = nullptr,
-        };
-        cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
-            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-            .pNext = nullptr,
-            .flags = 0,
-            .attachmentCount = static_cast<u32>(descriptions.size()),
-            .pAttachments = descriptions.data(),
-            .subpassCount = 1,
-            .pSubpasses = &subpass,
-            .dependencyCount = 0,
-            .pDependencies = nullptr,
-        });
-    }
-    renderpass = *cache_pair->second;
+    renderpass = runtime.render_pass_cache.Get(renderpass_key);
+
     render_area = VkExtent2D{
         .width = key.size.width,
         .height = key.size.height,
     };
     num_color_buffers = static_cast<u32>(num_colors);
-    framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{
+    framebuffer = runtime.device.GetLogical().CreateFramebuffer({
         .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 172bcdf98f..189ee5a68e 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -26,35 +26,10 @@ class Device;
 class Image;
 class ImageView;
 class Framebuffer;
+class RenderPassCache;
 class StagingBufferPool;
 class VKScheduler;
 
-struct RenderPassKey {
-    constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
-
-    std::array<PixelFormat, NUM_RT> color_formats;
-    PixelFormat depth_format;
-    VkSampleCountFlagBits samples;
-};
-
-} // namespace Vulkan
-
-namespace std {
-template <>
-struct hash<Vulkan::RenderPassKey> {
-    [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
-        size_t value = static_cast<size_t>(key.depth_format) << 48;
-        value ^= static_cast<size_t>(key.samples) << 52;
-        for (size_t i = 0; i < key.color_formats.size(); ++i) {
-            value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
-        }
-        return value;
-    }
-};
-} // namespace std
-
-namespace Vulkan {
-
 struct TextureCacheRuntime {
     const Device& device;
     VKScheduler& scheduler;
@@ -62,7 +37,7 @@ struct TextureCacheRuntime {
     StagingBufferPool& staging_buffer_pool;
     BlitImageHelper& blit_image_helper;
     ASTCDecoderPass& astc_decoder_pass;
-    std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{};
+    RenderPassCache& render_pass_cache;
 
     void Finish();
 
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 4887d6fd9a..f0e5b098c0 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -49,6 +49,7 @@ constexpr std::array REQUIRED_EXTENSIONS{
     VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
     VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
     VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
+    VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
 #ifdef _WIN32
     VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
 #endif
@@ -312,6 +313,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
     };
     SetNext(next, host_query_reset);
 
+    VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT,
+        .pNext = nullptr,
+        .shaderDemoteToHelperInvocation = true,
+    };
+    SetNext(next, demote);
+
     VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
     if (is_float16_supported) {
         float16_int8 = {
@@ -597,8 +605,14 @@ void Device::CheckSuitability(bool requires_swapchain) const {
             throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
         }
     }
+    VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{};
+    demote.sType =
+        VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT;
+    demote.pNext = nullptr;
+
     VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
     robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
+    robustness2.pNext = &demote;
 
     VkPhysicalDeviceFeatures2KHR features2{};
     features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
@@ -625,6 +639,7 @@ void Device::CheckSuitability(bool requires_swapchain) const {
         std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
         std::make_pair(features.shaderStorageImageWriteWithoutFormat,
                        "shaderStorageImageWriteWithoutFormat"),
+        std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
         std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
         std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
         std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
-- 
cgit v1.2.3-70-g09d2


From 68a9505d8a1d00c6ba2739bc0af3069cf87b9b84 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 24 Mar 2021 01:33:45 -0300
Subject: shader: Implement NDC [-1, 1], attribute types and default varying
 initialization

---
 src/shader_recompiler/CMakeLists.txt               |  1 +
 .../backend/spirv/emit_context.cpp                 | 35 +++++++----
 src/shader_recompiler/backend/spirv/emit_context.h | 10 +++-
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  2 +
 .../backend/spirv/emit_spirv_context_get_set.cpp   | 69 ++++++++++++++--------
 .../backend/spirv/emit_spirv_special.cpp           | 35 +++++++++++
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  8 +++
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  3 +
 .../frontend/ir/microinstruction.cpp               |  2 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |  4 ++
 .../frontend/maxwell/structured_control_flow.cpp   |  7 ++-
 src/shader_recompiler/profile.h                    | 13 ++++
 .../renderer_vulkan/vk_graphics_pipeline.cpp       |  3 +
 .../renderer_vulkan/vk_pipeline_cache.cpp          | 33 ++++++++++-
 src/video_core/renderer_vulkan/vk_pipeline_cache.h |  4 +-
 15 files changed, 186 insertions(+), 43 deletions(-)
 create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_special.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 086bdf8d00..028e8b2d27 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -14,6 +14,7 @@ add_library(shader_recompiler STATIC
     backend/spirv/emit_spirv_logical.cpp
     backend/spirv/emit_spirv_memory.cpp
     backend/spirv/emit_spirv_select.cpp
+    backend/spirv/emit_spirv_special.cpp
     backend/spirv/emit_spirv_undefined.cpp
     backend/spirv/emit_spirv_vote.cpp
     environment.h
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 36f130781c..ea46af2443 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -67,6 +67,18 @@ Id DefineInput(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin =
 Id DefineOutput(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin = std::nullopt) {
     return DefineVariable(ctx, type, builtin, spv::StorageClass::Output);
 }
+
+Id GetAttributeType(EmitContext& ctx, AttributeType type) {
+    switch (type) {
+    case AttributeType::Float:
+        return ctx.F32[4];
+    case AttributeType::SignedInt:
+        return ctx.TypeVector(ctx.TypeInt(32, true), 4);
+    case AttributeType::UnsignedInt:
+        return ctx.U32[4];
+    }
+    throw InvalidArgument("Invalid attribute type {}", type);
+}
 } // Anonymous namespace
 
 void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@@ -82,11 +94,11 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
 }
 
 EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding)
-    : Sirit::Module(0x00010000), profile{profile_} {
+    : Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} {
     AddCapability(spv::Capability::Shader);
     DefineCommonTypes(program.info);
     DefineCommonConstants();
-    DefineInterfaces(program.info, program.stage);
+    DefineInterfaces(program.info);
     DefineConstantBuffers(program.info, binding);
     DefineStorageBuffers(program.info, binding);
     DefineTextures(program.info, binding);
@@ -130,6 +142,9 @@ void EmitContext::DefineCommonTypes(const Info& info) {
     U32.Define(*this, TypeInt(32, false), "u32");
 
     input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
+    input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
+    input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32");
+
     output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
 
     if (info.uses_int8) {
@@ -162,9 +177,9 @@ void EmitContext::DefineCommonConstants() {
     u32_zero_value = Constant(U32[1], 0U);
 }
 
-void EmitContext::DefineInterfaces(const Info& info, Stage stage) {
-    DefineInputs(info, stage);
-    DefineOutputs(info, stage);
+void EmitContext::DefineInterfaces(const Info& info) {
+    DefineInputs(info);
+    DefineOutputs(info);
 }
 
 void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
@@ -252,7 +267,7 @@ void EmitContext::DefineLabels(IR::Program& program) {
     }
 }
 
-void EmitContext::DefineInputs(const Info& info, Stage stage) {
+void EmitContext::DefineInputs(const Info& info) {
     if (info.uses_workgroup_id) {
         workgroup_id = DefineInput(*this, U32[3], spv::BuiltIn::WorkgroupId);
     }
@@ -288,8 +303,8 @@ void EmitContext::DefineInputs(const Info& info, Stage stage) {
         if (!info.loads_generics[index]) {
             continue;
         }
-        // FIXME: Declare size from input
-        const Id id{DefineInput(*this, F32[4])};
+        const Id type{GetAttributeType(*this, profile.generic_input_types[index])};
+        const Id id{DefineInput(*this, type)};
         Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
         Name(id, fmt::format("in_attr{}", index));
         input_generics[index] = id;
@@ -323,8 +338,8 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions:
     }
 }
 
-void EmitContext::DefineOutputs(const Info& info, Stage stage) {
-    if (info.stores_position) {
+void EmitContext::DefineOutputs(const Info& info) {
+    if (info.stores_position || stage == Stage::VertexB) {
         output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position);
     }
     for (size_t i = 0; i < info.stores_generics.size(); ++i) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 6e64360bf8..5ed815c065 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -52,6 +52,7 @@ public:
     [[nodiscard]] Id Def(const IR::Value& value);
 
     const Profile& profile;
+    Stage stage{};
 
     Id void_id{};
     Id U1{};
@@ -72,6 +73,9 @@ public:
     UniformDefinitions uniform_types;
 
     Id input_f32{};
+    Id input_u32{};
+    Id input_s32{};
+
     Id output_f32{};
 
     Id storage_u32{};
@@ -104,7 +108,7 @@ public:
 private:
     void DefineCommonTypes(const Info& info);
     void DefineCommonConstants();
-    void DefineInterfaces(const Info& info, Stage stage);
+    void DefineInterfaces(const Info& info);
     void DefineConstantBuffers(const Info& info, u32& binding);
     void DefineStorageBuffers(const Info& info, u32& binding);
     void DefineTextures(const Info& info, u32& binding);
@@ -113,8 +117,8 @@ private:
     void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding,
                                Id type, char type_char, u32 element_size);
 
-    void DefineInputs(const Info& info, Stage stage);
-    void DefineOutputs(const Info& info, Stage stage);
+    void DefineInputs(const Info& info);
+    void DefineOutputs(const Info& info);
 };
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index ce23200f28..7fefcf2f28 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -28,6 +28,8 @@ void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label);
 void EmitSelectionMerge(EmitContext& ctx, Id merge_label);
 void EmitReturn(EmitContext& ctx);
 void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label);
+void EmitPrologue(EmitContext& ctx);
+void EmitEpilogue(EmitContext& ctx);
 void EmitGetRegister(EmitContext& ctx);
 void EmitSetRegister(EmitContext& ctx);
 void EmitGetPred(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 052b84151a..8fc040f8b1 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -2,30 +2,26 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <tuple>
+
 #include "shader_recompiler/backend/spirv/emit_spirv.h"
 
+#pragma optimize("", off)
+
 namespace Shader::Backend::SPIRV {
 namespace {
-Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
-    const u32 element{static_cast<u32>(attr) % 4};
-    const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};
-    if (IR::IsGeneric(attr)) {
-        const u32 index{IR::GenericAttributeIndex(attr)};
-        return ctx.OpAccessChain(ctx.input_f32, ctx.input_generics.at(index), element_id());
-    }
-    switch (attr) {
-    case IR::Attribute::PositionX:
-    case IR::Attribute::PositionY:
-    case IR::Attribute::PositionZ:
-    case IR::Attribute::PositionW:
-        return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id());
-    case IR::Attribute::InstanceId:
-        return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
-    case IR::Attribute::VertexId:
-        return ctx.OpLoad(ctx.U32[1], ctx.vertex_id);
-    default:
-        throw NotImplementedException("Read attribute {}", attr);
+std::tuple<Id, Id, bool> AttrTypes(EmitContext& ctx, u32 index) {
+    const bool is_first_reader{ctx.stage == Stage::VertexB};
+    const AttributeType type{ctx.profile.generic_input_types.at(index)};
+    switch (type) {
+    case AttributeType::Float:
+        return {ctx.input_f32, ctx.F32[1], false};
+    case AttributeType::UnsignedInt:
+        return {ctx.input_u32, ctx.U32[1], true};
+    case AttributeType::SignedInt:
+        return {ctx.input_s32, ctx.TypeInt(32, true), true};
     }
+    throw InvalidArgument("Invalid attribute type {}", type);
 }
 
 Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
@@ -129,19 +125,40 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
 }
 
 Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {
-    if (!ctx.profile.support_vertex_instance_id) {
-        switch (attr) {
-        case IR::Attribute::InstanceId:
+    const u32 element{static_cast<u32>(attr) % 4};
+    const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};
+    if (IR::IsGeneric(attr)) {
+        const u32 index{IR::GenericAttributeIndex(attr)};
+        const auto [pointer_type, type, needs_cast]{AttrTypes(ctx, index)};
+        const Id generic_id{ctx.input_generics.at(index)};
+        const Id pointer{ctx.OpAccessChain(pointer_type, generic_id, element_id())};
+        const Id value{ctx.OpLoad(type, pointer)};
+        return needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
+    }
+    switch (attr) {
+    case IR::Attribute::PositionX:
+    case IR::Attribute::PositionY:
+    case IR::Attribute::PositionZ:
+    case IR::Attribute::PositionW:
+        return ctx.OpLoad(ctx.F32[1],
+                          ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id()));
+    case IR::Attribute::InstanceId:
+        if (ctx.profile.support_vertex_instance_id) {
+            return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
+        } else {
             return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_index),
                               ctx.OpLoad(ctx.U32[1], ctx.base_instance));
-        case IR::Attribute::VertexId:
+        }
+    case IR::Attribute::VertexId:
+        if (ctx.profile.support_vertex_instance_id) {
+            return ctx.OpLoad(ctx.U32[1], ctx.vertex_id);
+        } else {
             return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_index),
                               ctx.OpLoad(ctx.U32[1], ctx.base_vertex));
-        default:
-            break;
         }
+    default:
+        throw NotImplementedException("Read attribute {}", attr);
     }
-    return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr));
 }
 
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
new file mode 100644
index 0000000000..70ae7b51ee
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
@@ -0,0 +1,35 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+
+namespace Shader::Backend::SPIRV {
+
+void EmitPrologue(EmitContext& ctx) {
+    if (ctx.stage == Stage::VertexB) {
+        const Id zero{ctx.Constant(ctx.F32[1], 0.0f)};
+        const Id one{ctx.Constant(ctx.F32[1], 1.0f)};
+        const Id null_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, zero)};
+        ctx.OpStore(ctx.output_position, ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one));
+        for (const Id generic_id : ctx.output_generics) {
+            if (Sirit::ValidId(generic_id)) {
+                ctx.OpStore(generic_id, null_vector);
+            }
+        }
+    }
+}
+
+void EmitEpilogue(EmitContext& ctx) {
+    if (ctx.profile.convert_depth_mode) {
+        const Id type{ctx.F32[1]};
+        const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)};
+        const Id z{ctx.OpCompositeExtract(type, position, 2u)};
+        const Id w{ctx.OpCompositeExtract(type, position, 3u)};
+        const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))};
+        const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)};
+        ctx.OpStore(ctx.output_position, vector);
+    }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index ff29701253..ce610799a7 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -92,6 +92,14 @@ void IREmitter::DemoteToHelperInvocation(Block* continue_label) {
     Inst(Opcode::DemoteToHelperInvocation, continue_label);
 }
 
+void IREmitter::Prologue() {
+    Inst(Opcode::Prologue);
+}
+
+void IREmitter::Epilogue() {
+    Inst(Opcode::Epilogue);
+}
+
 U32 IREmitter::GetReg(IR::Reg reg) {
     return Inst<U32>(Opcode::GetRegister, reg);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 1708be3efc..39109b0ded 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -39,6 +39,9 @@ public:
     void Return();
     void DemoteToHelperInvocation(Block* continue_label);
 
+    void Prologue();
+    void Epilogue();
+
     [[nodiscard]] U32 GetReg(IR::Reg reg);
     void SetReg(IR::Reg reg, const U32& value);
 
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 21b7d8a9f4..ba39680564 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -56,6 +56,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::SelectionMerge:
     case Opcode::Return:
     case Opcode::DemoteToHelperInvocation:
+    case Opcode::Prologue:
+    case Opcode::Epilogue:
     case Opcode::SetAttribute:
     case Opcode::SetAttributeIndexed:
     case Opcode::SetFragColor:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index fe888b8b2f..8945c7b04c 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -15,6 +15,10 @@ OPCODE(SelectionMerge,                                      Void,           Labe
 OPCODE(Return,                                              Void,                                                                                           )
 OPCODE(DemoteToHelperInvocation,                            Void,           Label,                                                                          )
 
+// Special operations
+OPCODE(Prologue,                                            Void,                                                                                           )
+OPCODE(Epilogue,                                            Void,                                                                                           )
+
 // Context getters/setters
 OPCODE(GetRegister,                                         U32,            Reg,                                                                            )
 OPCODE(SetRegister,                                         Void,           Reg,            U32,                                                            )
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index cec03e73ee..fdac1c95a7 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -634,6 +634,9 @@ public:
         : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
           block_list{block_list_} {
         Visit(root_stmt, nullptr, nullptr);
+
+        IR::IREmitter ir{*block_list.front()};
+        ir.Prologue();
     }
 
 private:
@@ -734,7 +737,9 @@ private:
                     current_block = block_pool.Create(inst_pool);
                     block_list.push_back(current_block);
                 }
-                IR::IREmitter{*current_block}.Return();
+                IR::IREmitter ir{*current_block};
+                ir.Epilogue();
+                ir.Return();
                 current_block = nullptr;
                 break;
             }
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index b57cbc3105..41550bfc63 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -4,8 +4,18 @@
 
 #pragma once
 
+#include <array>
+
+#include "common/common_types.h"
+
 namespace Shader {
 
+enum class AttributeType : u8 {
+    Float,
+    SignedInt,
+    UnsignedInt,
+};
+
 struct Profile {
     bool unified_descriptor_binding{};
     bool support_vertex_instance_id{};
@@ -24,6 +34,9 @@ struct Profile {
 
     // FClamp is broken and OpFMax + OpFMin should be used instead
     bool has_broken_spirv_clamp{};
+
+    std::array<AttributeType, 32> generic_input_types{};
+    bool convert_depth_mode{};
 };
 
 } // namespace Shader
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index a2ec418b12..a87ed1976a 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -181,6 +181,9 @@ void GraphicsPipeline::Configure(bool is_indexed) {
         PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(),
                              *texture_cache, *update_descriptor_queue, index);
     }
+    if (!descriptor_set_layout) {
+        return;
+    }
     const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
     update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
 
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index bdbc8dd1e7..504b8c9d65 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -437,7 +437,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
       buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {
     const auto& float_control{device.FloatControlProperties()};
     const VkDriverIdKHR driver_id{device.GetDriverID()};
-    profile = Shader::Profile{
+    base_profile = Shader::Profile{
         .unified_descriptor_binding = true,
         .support_vertex_instance_id = false,
         .support_float_controls = true,
@@ -458,6 +458,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
         .support_vote = true,
         .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
         .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
+        .generic_input_types{},
     };
 }
 
@@ -589,6 +590,7 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools,
         Shader::Environment& env{*envs[env_index]};
         ++env_index;
 
+        const Shader::Profile profile{MakeProfile(key, env.ShaderStage())};
         const std::vector<u32> code{EmitSPIRV(profile, env, program, binding)};
         modules[stage_index] = BuildShader(device, code);
     }
@@ -645,9 +647,36 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools,
     Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
     Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)};
     u32 binding{0};
-    std::vector<u32> code{EmitSPIRV(profile, env, program, binding)};
+    std::vector<u32> code{EmitSPIRV(base_profile, env, program, binding)};
     return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info,
                            BuildShader(device, code)};
 }
 
+static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
+    switch (attr.Type()) {
+    case Maxwell::VertexAttribute::Type::SignedNorm:
+    case Maxwell::VertexAttribute::Type::UnsignedNorm:
+    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+    case Maxwell::VertexAttribute::Type::SignedScaled:
+    case Maxwell::VertexAttribute::Type::Float:
+        return Shader::AttributeType::Float;
+    case Maxwell::VertexAttribute::Type::SignedInt:
+        return Shader::AttributeType::SignedInt;
+    case Maxwell::VertexAttribute::Type::UnsignedInt:
+        return Shader::AttributeType::UnsignedInt;
+    }
+    return Shader::AttributeType::Float;
+}
+
+Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key,
+                                           Shader::Stage stage) {
+    Shader::Profile profile{base_profile};
+    if (stage == Shader::Stage::VertexB) {
+        profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0;
+        std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(),
+                               &CastAttributeType);
+    }
+    return profile;
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index d481f56f9b..e09d78063a 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -156,6 +156,8 @@ private:
     ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key,
                                           Shader::Environment& env) const;
 
+    Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage);
+
     Tegra::GPU& gpu;
     Tegra::Engines::Maxwell3D& maxwell3d;
     Tegra::Engines::KeplerCompute& kepler_compute;
@@ -176,7 +178,7 @@ private:
 
     ShaderPools main_pools;
 
-    Shader::Profile profile;
+    Shader::Profile base_profile;
     std::string pipeline_cache_filename;
 };
 
-- 
cgit v1.2.3-70-g09d2


From 32c5483beb2f79f5d55eb2906f2bfdfa1698bca3 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Thu, 25 Mar 2021 11:31:37 -0400
Subject: shader: Implement SHFL

---
 src/shader_recompiler/CMakeLists.txt               |   3 +-
 .../backend/spirv/emit_context.cpp                 |   3 +-
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |   6 +-
 src/shader_recompiler/backend/spirv/emit_spirv.h   |   9 ++
 .../backend/spirv/emit_spirv_vote.cpp              |  58 ---------
 .../backend/spirv/emit_spirv_warp.cpp              | 135 +++++++++++++++++++++
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  23 ++++
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  12 +-
 .../frontend/ir/microinstruction.cpp               |  12 ++
 .../frontend/ir/microinstruction.h                 |   1 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |   7 +-
 .../maxwell/translate/impl/integer_scaled_add.cpp  |   4 +-
 .../maxwell/translate/impl/not_implemented.cpp     |   4 -
 .../maxwell/translate/impl/warp_shuffle.cpp        |  69 +++++++++++
 .../ir_opt/collect_shader_info_pass.cpp            |   6 +
 src/shader_recompiler/shader_info.h                |   1 +
 16 files changed, 284 insertions(+), 69 deletions(-)
 delete mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp
 create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 028e8b2d27..4161783c83 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -16,7 +16,7 @@ add_library(shader_recompiler STATIC
     backend/spirv/emit_spirv_select.cpp
     backend/spirv/emit_spirv_special.cpp
     backend/spirv/emit_spirv_undefined.cpp
-    backend/spirv/emit_spirv_vote.cpp
+    backend/spirv/emit_spirv_warp.cpp
     environment.h
     exception.h
     file_environment.cpp
@@ -125,6 +125,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/texture_fetch.cpp
     frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
     frontend/maxwell/translate/impl/vote.cpp
+    frontend/maxwell/translate/impl/warp_shuffle.cpp
     frontend/maxwell/translate/translate.cpp
     frontend/maxwell/translate/translate.h
     ir_opt/collect_shader_info_pass.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index ea46af2443..5db4a9082c 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -274,7 +274,8 @@ void EmitContext::DefineInputs(const Info& info) {
     if (info.uses_local_invocation_id) {
         local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId);
     }
-    if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) {
+    if (info.uses_subgroup_invocation_id ||
+        (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote)) {
         subgroup_local_invocation_id =
             DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId);
     }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 1074039120..cee72f50df 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -224,7 +224,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
         ctx.AddExtension("SPV_KHR_shader_draw_parameters");
         ctx.AddCapability(spv::Capability::DrawParameters);
     }
-    if (info.uses_subgroup_vote && profile.support_vote) {
+    if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id) && profile.support_vote) {
         ctx.AddExtension("SPV_KHR_shader_ballot");
         ctx.AddCapability(spv::Capability::SubgroupBallotKHR);
         if (!profile.warp_size_potentially_larger_than_guest) {
@@ -315,4 +315,8 @@ void EmitGetSparseFromOp(EmitContext&) {
     throw LogicError("Unreachable instruction");
 }
 
+void EmitGetInBoundsFromOp(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
+
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 6d4adafc71..a233a48170 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -158,6 +158,7 @@ void EmitGetSignFromOp(EmitContext& ctx);
 void EmitGetCarryFromOp(EmitContext& ctx);
 void EmitGetOverflowFromOp(EmitContext& ctx);
 void EmitGetSparseFromOp(EmitContext& ctx);
+void EmitGetInBoundsFromOp(EmitContext& ctx);
 Id EmitFPAbs16(EmitContext& ctx, Id value);
 Id EmitFPAbs32(EmitContext& ctx, Id value);
 Id EmitFPAbs64(EmitContext& ctx, Id value);
@@ -355,5 +356,13 @@ Id EmitVoteAll(EmitContext& ctx, Id pred);
 Id EmitVoteAny(EmitContext& ctx, Id pred);
 Id EmitVoteEqual(EmitContext& ctx, Id pred);
 Id EmitSubgroupBallot(EmitContext& ctx, Id pred);
+Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                    Id segmentation_mask);
+Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                 Id segmentation_mask);
+Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                   Id segmentation_mask);
+Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                        Id segmentation_mask);
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp
deleted file mode 100644
index a63677ef2c..0000000000
--- a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "shader_recompiler/backend/spirv/emit_spirv.h"
-
-namespace Shader::Backend::SPIRV {
-namespace {
-Id LargeWarpBallot(EmitContext& ctx, Id ballot) {
-    const Id shift{ctx.Constant(ctx.U32[1], 5)};
-    const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
-    return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index);
-}
-} // Anonymous namespace
-
-Id EmitVoteAll(EmitContext& ctx, Id pred) {
-    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
-        return ctx.OpSubgroupAllKHR(ctx.U1, pred);
-    }
-    const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
-    const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
-    const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
-    const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
-    return ctx.OpIEqual(ctx.U1, lhs, active_mask);
-}
-
-Id EmitVoteAny(EmitContext& ctx, Id pred) {
-    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
-        return ctx.OpSubgroupAnyKHR(ctx.U1, pred);
-    }
-    const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
-    const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
-    const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
-    const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
-    return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value);
-}
-
-Id EmitVoteEqual(EmitContext& ctx, Id pred) {
-    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
-        return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred);
-    }
-    const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
-    const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
-    const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
-    const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)};
-    return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value),
-                           ctx.OpIEqual(ctx.U1, lhs, active_mask));
-}
-
-Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
-    const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)};
-    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
-        return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
-    }
-    return LargeWarpBallot(ctx, ballot);
-}
-
-} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
new file mode 100644
index 0000000000..44d8a347fc
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -0,0 +1,135 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id LargeWarpBallot(EmitContext& ctx, Id ballot) {
+    const Id shift{ctx.Constant(ctx.U32[1], 5)};
+    const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index);
+}
+
+void SetInBoundsFlag(IR::Inst* inst, Id result) {
+    IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+    if (!in_bounds) {
+        return;
+    }
+    in_bounds->SetDefinition(result);
+    in_bounds->Invalidate();
+}
+
+Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) {
+    return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask);
+}
+
+Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) {
+    return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id,
+                           ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask));
+}
+
+Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) {
+    const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
+    const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
+    return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask);
+}
+
+Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
+    return ctx.OpSelect(ctx.U32[1], in_range,
+                        ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
+}
+} // Anonymous namespace
+
+Id EmitVoteAll(EmitContext& ctx, Id pred) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return ctx.OpSubgroupAllKHR(ctx.U1, pred);
+    }
+    const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+    const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
+    const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+    const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
+    return ctx.OpIEqual(ctx.U1, lhs, active_mask);
+}
+
+Id EmitVoteAny(EmitContext& ctx, Id pred) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return ctx.OpSubgroupAnyKHR(ctx.U1, pred);
+    }
+    const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+    const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
+    const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+    const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
+    return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value);
+}
+
+Id EmitVoteEqual(EmitContext& ctx, Id pred) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred);
+    }
+    const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+    const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
+    const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+    const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)};
+    return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value),
+                           ctx.OpIEqual(ctx.U1, lhs, active_mask));
+}
+
+Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
+    const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)};
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
+    }
+    return LargeWarpBallot(ctx, ballot);
+}
+
+Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                    Id segmentation_mask) {
+    const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
+    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
+    const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
+
+    const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
+    const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
+    const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+    SetInBoundsFlag(inst, in_range);
+    return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                 Id segmentation_mask) {
+    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+    const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
+    const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+    SetInBoundsFlag(inst, in_range);
+    return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                   Id segmentation_mask) {
+    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+    const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
+    const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+    SetInBoundsFlag(inst, in_range);
+    return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+                        Id segmentation_mask) {
+    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+    const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
+    const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+    SetInBoundsFlag(inst, in_range);
+    return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 6280c08f65..418b7f5aca 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -374,6 +374,10 @@ U1 IREmitter::GetSparseFromOp(const Value& op) {
     return Inst<U1>(Opcode::GetSparseFromOp, op);
 }
 
+U1 IREmitter::GetInBoundsFromOp(const Value& op) {
+    return Inst<U1>(Opcode::GetInBoundsFromOp, op);
+}
+
 F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
     if (a.Type() != b.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
@@ -1486,4 +1490,23 @@ U32 IREmitter::SubgroupBallot(const U1& value) {
     return Inst<U32>(Opcode::SubgroupBallot, value);
 }
 
+U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                            const IR::U32& seg_mask) {
+    return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                         const IR::U32& seg_mask) {
+    return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                           const IR::U32& seg_mask) {
+    return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                                const IR::U32& seg_mask) {
+    return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask);
+}
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index ebbda78a9c..64738735e6 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -104,6 +104,7 @@ public:
     [[nodiscard]] U1 GetCarryFromOp(const Value& op);
     [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
     [[nodiscard]] U1 GetSparseFromOp(const Value& op);
+    [[nodiscard]] U1 GetInBoundsFromOp(const Value& op);
 
     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
@@ -147,7 +148,8 @@ public:
     [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
     [[nodiscard]] F32 FPSqrt(const F32& value);
     [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
-    [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value);
+    [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value,
+                                    const F16F32F64& max_value);
     [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
     [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
     [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
@@ -242,6 +244,14 @@ public:
     [[nodiscard]] U1 VoteAny(const U1& value);
     [[nodiscard]] U1 VoteEqual(const U1& value);
     [[nodiscard]] U32 SubgroupBallot(const U1& value);
+    [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                                   const IR::U32& seg_mask);
+    [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                                const IR::U32& seg_mask);
+    [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+                                  const IR::U32& seg_mask);
+    [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
+                                       const IR::U32& clamp, const IR::U32& seg_mask);
 
 private:
     IR::Block::iterator insertion_point;
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index ba39680564..be8eb4d4cf 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -89,6 +89,7 @@ bool Inst::IsPseudoInstruction() const noexcept {
     case Opcode::GetCarryFromOp:
     case Opcode::GetOverflowFromOp:
     case Opcode::GetSparseFromOp:
+    case Opcode::GetInBoundsFromOp:
         return true;
     default:
         return false;
@@ -123,6 +124,9 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
     case Opcode::GetSparseFromOp:
         CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
         return associated_insts->sparse_inst;
+    case Opcode::GetInBoundsFromOp:
+        CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp);
+        return associated_insts->in_bounds_inst;
     default:
         throw InvalidArgument("{} is not a pseudo-instruction", opcode);
     }
@@ -262,6 +266,10 @@ void Inst::Use(const Value& value) {
         AllocAssociatedInsts(assoc_inst);
         SetPseudoInstruction(assoc_inst->sparse_inst, this);
         break;
+    case Opcode::GetInBoundsFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->in_bounds_inst, this);
+        break;
     default:
         break;
     }
@@ -289,6 +297,10 @@ void Inst::UndoUse(const Value& value) {
         AllocAssociatedInsts(assoc_inst);
         RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
         break;
+    case Opcode::GetInBoundsFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp);
+        break;
     default:
         break;
     }
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index d5336c4383..770bbd5506 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -134,6 +134,7 @@ static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
 
 struct AssociatedInsts {
     union {
+        Inst* in_bounds_inst;
         Inst* sparse_inst;
         Inst* zero_inst{};
     };
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index dd17212a1b..a2479c46ac 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -159,6 +159,7 @@ OPCODE(GetSignFromOp,                                       U1,             Opaq
 OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                                         )
 OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                                         )
 OPCODE(GetSparseFromOp,                                     U1,             Opaque,                                                                         )
+OPCODE(GetInBoundsFromOp,                                   U1,             Opaque,                                                                         )
 
 // Floating-point operations
 OPCODE(FPAbs16,                                             F16,            F16,                                                                            )
@@ -363,8 +364,12 @@ OPCODE(ImageSampleExplicitLod,                              F32x4,          U32,
 OPCODE(ImageSampleDrefImplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
 OPCODE(ImageSampleDrefExplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
 
-// Vote operations
+// Warp operations
 OPCODE(VoteAll,                                             U1,             U1,                                                                             )
 OPCODE(VoteAny,                                             U1,             U1,                                                                             )
 OPCODE(VoteEqual,                                           U1,             U1,                                                                             )
 OPCODE(SubgroupBallot,                                      U32,            U1,                                                                             )
+OPCODE(ShuffleIndex,                                        U32,            U32,            U32,            U32,            U32,                            )
+OPCODE(ShuffleUp,                                           U32,            U32,            U32,            U32,            U32,                            )
+OPCODE(ShuffleDown,                                         U32,            U32,            U32,            U32,            U32,                            )
+OPCODE(ShuffleButterfly,                                    U32,            U32,            U32,            U32,            U32,                            )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
index 5469e445ae..42fd42bb1a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -53,8 +53,8 @@ void TranslatorVisitor::ISCADD_reg(u64 insn) {
     ISCADD(*this, insn, GetReg20(insn));
 }
 
-void TranslatorVisitor::ISCADD_cbuf(u64) {
-    throw NotImplementedException("ISCADD (cbuf)");
+void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
+    ISCADD(*this, insn, GetCbuf(insn));
 }
 
 void TranslatorVisitor::ISCADD_imm(u64 insn) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index c0e36a7e27..3ccd7b9253 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -301,10 +301,6 @@ void TranslatorVisitor::SETLMEMBASE(u64) {
     ThrowNotImplemented(Opcode::SETLMEMBASE);
 }
 
-void TranslatorVisitor::SHFL(u64) {
-    ThrowNotImplemented(Opcode::SHFL);
-}
-
 void TranslatorVisitor::SSY() {
     // SSY is a no-op
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
new file mode 100644
index 0000000000..550fed55cb
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class ShuffleMode : u64 {
+    IDX,
+    UP,
+    DOWN,
+    BFLY,
+};
+
+[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
+                                       const IR::U32& index, const IR::U32& mask,
+                                       ShuffleMode shfl_op) {
+    const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
+    const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
+    switch (shfl_op) {
+    case ShuffleMode::IDX:
+        return ir.ShuffleIndex(value, index, clamp, seg_mask);
+    case ShuffleMode::UP:
+        return ir.ShuffleUp(value, index, clamp, seg_mask);
+    case ShuffleMode::DOWN:
+        return ir.ShuffleDown(value, index, clamp, seg_mask);
+    case ShuffleMode::BFLY:
+        return ir.ShuffleButterfly(value, index, clamp, seg_mask);
+    default:
+        throw NotImplementedException("Invalid SHFL op {}", shfl_op);
+    }
+}
+
+void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<30, 2, ShuffleMode> mode;
+        BitField<48, 3, IR::Pred> pred;
+    } const shfl{insn};
+
+    const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
+    v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
+    v.X(shfl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHFL(u64 insn) {
+    union {
+        u64 insn;
+        BitField<20, 5, u64> src_a_imm;
+        BitField<28, 1, u64> src_a_flag;
+        BitField<29, 1, u64> src_b_flag;
+        BitField<34, 13, u64> src_b_imm;
+    } const flags{insn};
+    const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
+                                              : GetReg20(insn)};
+    const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
+                                              : GetReg39(insn)};
+    Shuffle(*this, insn, src_a, src_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 32f276f3bb..61cc314c7c 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -307,6 +307,12 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::LocalInvocationId:
         info.uses_local_invocation_id = true;
         break;
+    case IR::Opcode::ShuffleIndex:
+    case IR::Opcode::ShuffleUp:
+    case IR::Opcode::ShuffleDown:
+    case IR::Opcode::ShuffleButterfly:
+        info.uses_subgroup_invocation_id = true;
+        break;
     case IR::Opcode::GetCbufU8:
     case IR::Opcode::GetCbufS8:
     case IR::Opcode::GetCbufU16:
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 3d9f04d1ad..27e61a5f96 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -56,6 +56,7 @@ struct Info {
 
     bool uses_workgroup_id{};
     bool uses_local_invocation_id{};
+    bool uses_subgroup_invocation_id{};
 
     std::array<bool, 32> loads_generics{};
     bool loads_position{};
-- 
cgit v1.2.3-70-g09d2


From e860870dd2244cd87645190c89244f1d2c4c775b Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 28 Mar 2021 19:53:34 -0300
Subject: shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when
 available

---
 src/shader_recompiler/CMakeLists.txt               |   2 +
 .../backend/spirv/emit_context.cpp                 | 115 +++++++++++-
 src/shader_recompiler/backend/spirv/emit_context.h |  21 +++
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  14 ++
 .../backend/spirv/emit_spirv_context_get_set.cpp   |  10 ++
 .../backend/spirv/emit_spirv_shared_memory.cpp     | 175 ++++++++++++++++++
 src/shader_recompiler/environment.h                |   4 +
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  46 +++++
 src/shader_recompiler/frontend/ir/ir_emitter.h     |   6 +
 .../frontend/ir/microinstruction.cpp               |   6 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |  18 ++
 src/shader_recompiler/frontend/ir/program.h        |   2 +
 src/shader_recompiler/frontend/maxwell/program.cpp |   2 +
 .../translate/impl/load_store_local_shared.cpp     | 197 +++++++++++++++++++++
 .../maxwell/translate/impl/not_implemented.cpp     |  16 --
 .../ir_opt/collect_shader_info_pass.cpp            |   6 +
 src/shader_recompiler/profile.h                    |   3 +
 .../renderer_vulkan/vk_pipeline_cache.cpp          |  47 ++++-
 src/video_core/vulkan_common/vulkan_device.cpp     |  34 ++++
 src/video_core/vulkan_common/vulkan_device.h       |  42 +++--
 20 files changed, 730 insertions(+), 36 deletions(-)
 create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 55b846c845..003cbefb14 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -14,6 +14,7 @@ add_library(shader_recompiler STATIC
     backend/spirv/emit_spirv_logical.cpp
     backend/spirv/emit_spirv_memory.cpp
     backend/spirv/emit_spirv_select.cpp
+    backend/spirv/emit_spirv_shared_memory.cpp
     backend/spirv/emit_spirv_special.cpp
     backend/spirv/emit_spirv_undefined.cpp
     backend/spirv/emit_spirv_warp.cpp
@@ -111,6 +112,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/load_constant.cpp
     frontend/maxwell/translate/impl/load_effective_address.cpp
     frontend/maxwell/translate/impl/load_store_attribute.cpp
+    frontend/maxwell/translate/impl/load_store_local_shared.cpp
     frontend/maxwell/translate/impl/load_store_memory.cpp
     frontend/maxwell/translate/impl/logic_operation.cpp
     frontend/maxwell/translate/impl/logic_operation_three_input.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index a8ca33c1db..96d0e9b4d2 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -9,6 +9,7 @@
 #include <fmt/format.h>
 
 #include "common/common_types.h"
+#include "common/div_ceil.h"
 #include "shader_recompiler/backend/spirv/emit_context.h"
 
 namespace Shader::Backend::SPIRV {
@@ -96,11 +97,13 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
 }
 
 EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding)
-    : Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} {
+    : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} {
     AddCapability(spv::Capability::Shader);
     DefineCommonTypes(program.info);
     DefineCommonConstants();
     DefineInterfaces(program.info);
+    DefineLocalMemory(program);
+    DefineSharedMemory(program);
     DefineConstantBuffers(program.info, binding);
     DefineStorageBuffers(program.info, binding);
     DefineTextures(program.info, binding);
@@ -143,6 +146,8 @@ void EmitContext::DefineCommonTypes(const Info& info) {
     F32.Define(*this, TypeFloat(32), "f32");
     U32.Define(*this, TypeInt(32, false), "u32");
 
+    private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32");
+
     input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
     input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
     input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32");
@@ -184,6 +189,105 @@ void EmitContext::DefineInterfaces(const Info& info) {
     DefineOutputs(info);
 }
 
+void EmitContext::DefineLocalMemory(const IR::Program& program) {
+    if (program.local_memory_size == 0) {
+        return;
+    }
+    const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)};
+    const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))};
+    const Id pointer{TypePointer(spv::StorageClass::Private, type)};
+    local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private);
+    if (profile.supported_spirv >= 0x00010400) {
+        interfaces.push_back(local_memory);
+    }
+}
+
+void EmitContext::DefineSharedMemory(const IR::Program& program) {
+    if (program.shared_memory_size == 0) {
+        return;
+    }
+    const auto make{[&](Id element_type, u32 element_size) {
+        const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)};
+        const Id array_type{TypeArray(element_type, Constant(U32[1], num_elements))};
+        Decorate(array_type, spv::Decoration::ArrayStride, element_size);
+
+        const Id struct_type{TypeStruct(array_type)};
+        MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
+        Decorate(struct_type, spv::Decoration::Block);
+
+        const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
+        const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
+        const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
+        Decorate(variable, spv::Decoration::Aliased);
+        interfaces.push_back(variable);
+
+        return std::make_pair(variable, element_pointer);
+    }};
+    if (profile.support_explicit_workgroup_layout) {
+        AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
+        AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
+        if (program.info.uses_int8) {
+            AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
+            std::tie(shared_memory_u8, shared_u8) = make(U8, 1);
+        }
+        if (program.info.uses_int16) {
+            AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
+            std::tie(shared_memory_u16, shared_u16) = make(U16, 2);
+        }
+        std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4);
+        std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8);
+        std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16);
+    }
+    const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
+    const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))};
+    const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)};
+    shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
+    shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup);
+    interfaces.push_back(shared_memory_u32);
+
+    const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
+    const auto make_function{[&](u32 mask, u32 size) {
+        const Id loop_header{OpLabel()};
+        const Id continue_block{OpLabel()};
+        const Id merge_block{OpLabel()};
+
+        const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
+        const Id offset{OpFunctionParameter(U32[1])};
+        const Id insert_value{OpFunctionParameter(U32[1])};
+        AddLabel();
+        OpBranch(loop_header);
+
+        AddLabel(loop_header);
+        const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))};
+        const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Constant(U32[1], 3U))};
+        const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Constant(U32[1], mask))};
+        const Id count{Constant(U32[1], size)};
+        OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
+        OpBranch(continue_block);
+
+        AddLabel(continue_block);
+        const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)};
+        const Id old_value{OpLoad(U32[1], word_pointer)};
+        const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)};
+        const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Constant(U32[1], 1U),
+                                                    u32_zero_value, u32_zero_value, new_value,
+                                                    old_value)};
+        const Id success{OpIEqual(U1, atomic_res, old_value)};
+        OpBranchConditional(success, merge_block, loop_header);
+
+        AddLabel(merge_block);
+        OpReturn();
+        OpFunctionEnd();
+        return func;
+    }};
+    if (program.info.uses_int8) {
+        shared_store_u8_func = make_function(24, 8);
+    }
+    if (program.info.uses_int16) {
+        shared_store_u16_func = make_function(16, 16);
+    }
+}
+
 void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
     if (info.constant_buffer_descriptors.empty()) {
         return;
@@ -234,6 +338,9 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
         Decorate(id, spv::Decoration::Binding, binding);
         Decorate(id, spv::Decoration::DescriptorSet, 0U);
         Name(id, fmt::format("ssbo{}", index));
+        if (profile.supported_spirv >= 0x00010400) {
+            interfaces.push_back(id);
+        }
         std::fill_n(ssbos.data() + index, desc.count, id);
         index += desc.count;
         binding += desc.count;
@@ -261,6 +368,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) {
                 .image_type{image_type},
             });
         }
+        if (profile.supported_spirv >= 0x00010400) {
+            interfaces.push_back(id);
+        }
         binding += desc.count;
     }
 }
@@ -363,6 +473,9 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions:
         for (size_t i = 0; i < desc.count; ++i) {
             cbufs[desc.index + i].*member_type = id;
         }
+        if (profile.supported_spirv >= 0x00010400) {
+            interfaces.push_back(id);
+        }
         binding += desc.count;
     }
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 01b7b665d6..1a4e8221aa 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -73,6 +73,14 @@ public:
 
     UniformDefinitions uniform_types;
 
+    Id private_u32{};
+
+    Id shared_u8{};
+    Id shared_u16{};
+    Id shared_u32{};
+    Id shared_u32x2{};
+    Id shared_u32x4{};
+
     Id input_f32{};
     Id input_u32{};
     Id input_s32{};
@@ -96,6 +104,17 @@ public:
     Id base_vertex{};
     Id front_face{};
 
+    Id local_memory{};
+
+    Id shared_memory_u8{};
+    Id shared_memory_u16{};
+    Id shared_memory_u32{};
+    Id shared_memory_u32x2{};
+    Id shared_memory_u32x4{};
+
+    Id shared_store_u8_func{};
+    Id shared_store_u16_func{};
+
     Id input_position{};
     std::array<Id, 32> input_generics{};
 
@@ -111,6 +130,8 @@ private:
     void DefineCommonTypes(const Info& info);
     void DefineCommonConstants();
     void DefineInterfaces(const Info& info);
+    void DefineLocalMemory(const IR::Program& program);
+    void DefineSharedMemory(const IR::Program& program);
     void DefineConstantBuffers(const Info& info, u32& binding);
     void DefineStorageBuffers(const Info& info, u32& binding);
     void DefineTextures(const Info& info, u32& binding);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 837f0e858e..4f62af959f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -58,6 +58,8 @@ void EmitSetCFlag(EmitContext& ctx);
 void EmitSetOFlag(EmitContext& ctx);
 Id EmitWorkgroupId(EmitContext& ctx);
 Id EmitLocalInvocationId(EmitContext& ctx);
+Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
+void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
 Id EmitUndefU1(EmitContext& ctx);
 Id EmitUndefU8(EmitContext& ctx);
 Id EmitUndefU16(EmitContext& ctx);
@@ -94,6 +96,18 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va
                         Id value);
 void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
                          Id value);
+Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
+Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
+Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
+void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
 Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
 Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
 Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 4cbc2aec10..52dcef8a42 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -238,4 +238,14 @@ Id EmitLocalInvocationId(EmitContext& ctx) {
     return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id);
 }
 
+Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
+    const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
+    return ctx.OpLoad(ctx.U32[1], pointer);
+}
+
+void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) {
+    const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
+    ctx.OpStore(pointer, value);
+}
+
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
new file mode 100644
index 0000000000..fa2fc9ab4b
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
@@ -0,0 +1,175 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
+    const Id shift_id{ctx.Constant(ctx.U32[1], shift)};
+    const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+    return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
+}
+
+Id Word(EmitContext& ctx, Id offset) {
+    const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
+    const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+    const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+    return ctx.OpLoad(ctx.U32[1], pointer);
+}
+
+std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
+    const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))};
+    const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))};
+    const Id count_id{ctx.Constant(ctx.U32[1], count)};
+    return {bit, count_id};
+}
+} // Anonymous namespace
+
+Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{
+            ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+        return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
+    } else {
+        const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
+        return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+    }
+}
+
+Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{
+            ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+        return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
+    } else {
+        const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
+        return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+    }
+}
+
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+        return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
+    } else {
+        const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
+        return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+    }
+}
+
+Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+        return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
+    } else {
+        const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
+        return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+    }
+}
+
+Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
+        return ctx.OpLoad(ctx.U32[1], pointer);
+    } else {
+        return Word(ctx, offset);
+    }
+}
+
+Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
+        return ctx.OpLoad(ctx.U32[2], pointer);
+    } else {
+        const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
+        const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+        const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))};
+        const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
+        const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
+        return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
+                                        ctx.OpLoad(ctx.U32[1], rhs_pointer));
+    }
+}
+
+Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
+        return ctx.OpLoad(ctx.U32[4], pointer);
+    }
+    const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
+    const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+    std::array<Id, 4> values{};
+    for (u32 i = 0; i < 4; ++i) {
+        const Id index{i == 0 ? base_index
+                              : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))};
+        const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+        values[i] = ctx.OpLoad(ctx.U32[1], pointer);
+    }
+    return ctx.OpCompositeConstruct(ctx.U32[4], values);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{
+            ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+        ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
+    } else {
+        ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value);
+    }
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+        ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
+    } else {
+        ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value);
+    }
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
+    Id pointer{};
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
+    } else {
+        const Id shift{ctx.Constant(ctx.U32[1], 2U)};
+        const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+        pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
+    }
+    ctx.OpStore(pointer, value);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
+        ctx.OpStore(pointer, value);
+        return;
+    }
+    const Id shift{ctx.Constant(ctx.U32[1], 2U)};
+    const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+    const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))};
+    const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
+    const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
+    ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
+    ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
+    if (ctx.profile.support_explicit_workgroup_layout) {
+        const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
+        ctx.OpStore(pointer, value);
+        return;
+    }
+    const Id shift{ctx.Constant(ctx.U32[1], 2U)};
+    const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+    for (u32 i = 0; i < 4; ++i) {
+        const Id index{i == 0 ? base_index
+                              : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))};
+        const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+        ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
+    }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index 0c62c1c54f..9415d02f66 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -19,6 +19,10 @@ public:
 
     [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0;
 
+    [[nodiscard]] virtual u32 LocalMemorySize() const = 0;
+
+    [[nodiscard]] virtual u32 SharedMemorySize() const = 0;
+
     [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
 
     [[nodiscard]] const ProgramHeader& SPH() const noexcept {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 6d41442eea..d6a1d8ec20 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -355,6 +355,52 @@ void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) {
     Inst(Opcode::WriteGlobal128, address, vector);
 }
 
+U32 IREmitter::LoadLocal(const IR::U32& word_offset) {
+    return Inst<U32>(Opcode::LoadLocal, word_offset);
+}
+
+void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) {
+    Inst(Opcode::WriteLocal, word_offset, value);
+}
+
+Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) {
+    switch (bit_size) {
+    case 8:
+        return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
+    case 16:
+        return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
+    case 32:
+        return Inst(Opcode::LoadSharedU32, offset);
+    case 64:
+        return Inst(Opcode::LoadSharedU64, offset);
+    case 128:
+        return Inst(Opcode::LoadSharedU128, offset);
+    }
+    throw InvalidArgument("Invalid bit size {}", bit_size);
+}
+
+void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) {
+    switch (bit_size) {
+    case 8:
+        Inst(Opcode::WriteSharedU8, offset, value);
+        break;
+    case 16:
+        Inst(Opcode::WriteSharedU16, offset, value);
+        break;
+    case 32:
+        Inst(Opcode::WriteSharedU32, offset, value);
+        break;
+    case 64:
+        Inst(Opcode::WriteSharedU64, offset, value);
+        break;
+    case 128:
+        Inst(Opcode::WriteSharedU128, offset, value);
+        break;
+    default:
+        throw InvalidArgument("Invalid bit size {}", bit_size);
+    }
+}
+
 U1 IREmitter::GetZeroFromOp(const Value& op) {
     return Inst<U1>(Opcode::GetZeroFromOp, op);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 8d50aa6078..842c2bdafb 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -99,6 +99,12 @@ public:
     void WriteGlobal64(const U64& address, const IR::Value& vector);
     void WriteGlobal128(const U64& address, const IR::Value& vector);
 
+    [[nodiscard]] U32 LoadLocal(const U32& word_offset);
+    void WriteLocal(const U32& word_offset, const U32& value);
+
+    [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
+    void WriteShared(int bit_size, const U32& offset, const Value& value);
+
     [[nodiscard]] U1 GetZeroFromOp(const Value& op);
     [[nodiscard]] U1 GetSignFromOp(const Value& op);
     [[nodiscard]] U1 GetCarryFromOp(const Value& op);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index be8eb4d4cf..52a5e50349 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -76,6 +76,12 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::WriteStorage32:
     case Opcode::WriteStorage64:
     case Opcode::WriteStorage128:
+    case Opcode::WriteLocal:
+    case Opcode::WriteSharedU8:
+    case Opcode::WriteSharedU16:
+    case Opcode::WriteSharedU32:
+    case Opcode::WriteSharedU64:
+    case Opcode::WriteSharedU128:
         return true;
     default:
         return false;
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 5d7462d762..c756583282 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -89,6 +89,24 @@ OPCODE(WriteStorage32,                                      Void,           U32,
 OPCODE(WriteStorage64,                                      Void,           U32,            U32,            U32x2,                                          )
 OPCODE(WriteStorage128,                                     Void,           U32,            U32,            U32x4,                                          )
 
+// Local memory operations
+OPCODE(LoadLocal,                                           U32,            U32,                                                                            )
+OPCODE(WriteLocal,                                          Void,           U32,            U32,                                                            )
+
+// Shared memory operations
+OPCODE(LoadSharedU8,                                        U32,            U32,                                                                            )
+OPCODE(LoadSharedS8,                                        U32,            U32,                                                                            )
+OPCODE(LoadSharedU16,                                       U32,            U32,                                                                            )
+OPCODE(LoadSharedS16,                                       U32,            U32,                                                                            )
+OPCODE(LoadSharedU32,                                       U32,            U32,                                                                            )
+OPCODE(LoadSharedU64,                                       U32x2,          U32,                                                                            )
+OPCODE(LoadSharedU128,                                      U32x4,          U32,                                                                            )
+OPCODE(WriteSharedU8,                                       Void,           U32,            U32,                                                            )
+OPCODE(WriteSharedU16,                                      Void,           U32,            U32,                                                            )
+OPCODE(WriteSharedU32,                                      Void,           U32,            U32,                                                            )
+OPCODE(WriteSharedU64,                                      Void,           U32,            U32x2,                                                          )
+OPCODE(WriteSharedU128,                                     Void,           U32,            U32x4,                                                          )
+
 // Vector utility
 OPCODE(CompositeConstructU32x2,                             U32x2,          U32,            U32,                                                            )
 OPCODE(CompositeConstructU32x3,                             U32x3,          U32,            U32,            U32,                                            )
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
index 0162e919cb..3a37b3ab91 100644
--- a/src/shader_recompiler/frontend/ir/program.h
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -21,6 +21,8 @@ struct Program {
     Info info;
     Stage stage{};
     std::array<u32, 3> workgroup_size{};
+    u32 local_memory_size{};
+    u32 shared_memory_size{};
 };
 
 [[nodiscard]] std::string DumpProgram(const Program& program);
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index a914a91f48..7b08f11b06 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
     program.blocks = VisitAST(inst_pool, block_pool, env, cfg);
     program.post_order_blocks = PostOrder(program.blocks);
     program.stage = env.ShaderStage();
+    program.local_memory_size = env.LocalMemorySize();
     if (program.stage == Stage::Compute) {
         program.workgroup_size = env.WorkgroupSize();
+        program.shared_memory_size = env.SharedMemorySize();
     }
     RemoveUnreachableBlocks(program);
 
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 0000000000..68963c8ea6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,197 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+    U8,
+    S8,
+    U16,
+    S16,
+    B32,
+    B64,
+    B128,
+};
+
+IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> offset_reg;
+        BitField<20, 24, u64> absolute_offset;
+        BitField<20, 24, s64> relative_offset;
+    } const encoding{insn};
+
+    if (encoding.offset_reg == IR::Reg::RZ) {
+        return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
+    } else {
+        const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
+        return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+    }
+}
+
+std::pair<int, bool> GetSize(u64 insn) {
+    union {
+        u64 raw;
+        BitField<48, 3, Size> size;
+    } const encoding{insn};
+
+    const Size nnn = encoding.size;
+    switch (encoding.size) {
+    case Size::U8:
+        return {8, false};
+    case Size::S8:
+        return {8, true};
+    case Size::U16:
+        return {16, false};
+    case Size::S16:
+        return {16, true};
+    case Size::B32:
+        return {32, false};
+    case Size::B64:
+        return {64, false};
+    case Size::B128:
+        return {128, false};
+    default:
+        throw NotImplementedException("Invalid size {}", encoding.size.Value());
+    }
+}
+
+IR::Reg Reg(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> reg;
+    } const encoding{insn};
+
+    return encoding.reg;
+}
+
+IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
+    return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
+}
+
+IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
+    return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDL(u64 insn) {
+    const IR::U32 offset{Offset(*this, insn)};
+    const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))};
+
+    const IR::Reg dest{Reg(insn)};
+    const auto [bit_size, is_signed]{GetSize(insn)};
+    switch (bit_size) {
+    case 8: {
+        const IR::U32 bit{ByteOffset(ir, offset)};
+        X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed));
+        break;
+    }
+    case 16: {
+        const IR::U32 bit{ShortOffset(ir, offset)};
+        X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed));
+        break;
+    }
+    case 32:
+    case 64:
+    case 128:
+        if (!IR::IsAligned(dest, bit_size / 32)) {
+            throw NotImplementedException("Unaligned destination register {}", dest);
+        }
+        X(dest, ir.LoadLocal(word_offset));
+        for (int i = 1; i < bit_size / 32; ++i) {
+            X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i))));
+        }
+        break;
+    }
+}
+
+void TranslatorVisitor::LDS(u64 insn) {
+    const IR::U32 offset{Offset(*this, insn)};
+    const IR::Reg dest{Reg(insn)};
+    const auto [bit_size, is_signed]{GetSize(insn)};
+    const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
+    switch (bit_size) {
+    case 8:
+    case 16:
+    case 32:
+        X(dest, IR::U32{value});
+        break;
+    case 64:
+    case 128:
+        if (!IR::IsAligned(dest, bit_size / 32)) {
+            throw NotImplementedException("Unaligned destination register {}", dest);
+        }
+        for (int element = 0; element < bit_size / 32; ++element) {
+            X(dest + element, IR::U32{ir.CompositeExtract(value, element)});
+        }
+        break;
+    }
+}
+
+void TranslatorVisitor::STL(u64 insn) {
+    const IR::U32 offset{Offset(*this, insn)};
+    const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))};
+
+    const IR::Reg reg{Reg(insn)};
+    const IR::U32 src{X(reg)};
+    const int bit_size{GetSize(insn).first};
+    switch (bit_size) {
+    case 8: {
+        const IR::U32 bit{ByteOffset(ir, offset)};
+        const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
+        ir.WriteLocal(word_offset, value);
+        break;
+    }
+    case 16: {
+        const IR::U32 bit{ShortOffset(ir, offset)};
+        const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
+        ir.WriteLocal(word_offset, value);
+        break;
+    }
+    case 32:
+    case 64:
+    case 128:
+        if (!IR::IsAligned(reg, bit_size / 32)) {
+            throw NotImplementedException("Unaligned source register");
+        }
+        ir.WriteLocal(word_offset, src);
+        for (int i = 1; i < bit_size / 32; ++i) {
+            ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
+        }
+        break;
+    }
+}
+
+void TranslatorVisitor::STS(u64 insn) {
+    const IR::U32 offset{Offset(*this, insn)};
+    const IR::Reg reg{Reg(insn)};
+    const int bit_size{GetSize(insn).first};
+    switch (bit_size) {
+    case 8:
+    case 16:
+    case 32:
+        ir.WriteShared(bit_size, offset, X(reg));
+        break;
+    case 64:
+        if (!IR::IsAligned(reg, 2)) {
+            throw NotImplementedException("Unaligned source register {}", reg);
+        }
+        ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
+        break;
+    case 128: {
+        if (!IR::IsAligned(reg, 2)) {
+            throw NotImplementedException("Unaligned source register {}", reg);
+        }
+        const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
+        ir.WriteShared(128, offset, vector);
+        break;
+    }
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 4092166401..b62d8ee2aa 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) {
     ThrowNotImplemented(Opcode::LD);
 }
 
-void TranslatorVisitor::LDL(u64) {
-    ThrowNotImplemented(Opcode::LDL);
-}
-
-void TranslatorVisitor::LDS(u64) {
-    ThrowNotImplemented(Opcode::LDS);
-}
-
 void TranslatorVisitor::LEPC(u64) {
     ThrowNotImplemented(Opcode::LEPC);
 }
@@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) {
     ThrowNotImplemented(Opcode::ST);
 }
 
-void TranslatorVisitor::STL(u64) {
-    ThrowNotImplemented(Opcode::STL);
-}
-
 void TranslatorVisitor::STP(u64) {
     ThrowNotImplemented(Opcode::STP);
 }
 
-void TranslatorVisitor::STS(u64) {
-    ThrowNotImplemented(Opcode::STS);
-}
-
 void TranslatorVisitor::SUATOM_cas(u64) {
     ThrowNotImplemented(Opcode::SUATOM_cas);
 }
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 60be672283..c932c307ba 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -200,6 +200,9 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::LoadStorageS8:
     case IR::Opcode::WriteStorageU8:
     case IR::Opcode::WriteStorageS8:
+    case IR::Opcode::LoadSharedU8:
+    case IR::Opcode::LoadSharedS8:
+    case IR::Opcode::WriteSharedU8:
     case IR::Opcode::SelectU8:
     case IR::Opcode::ConvertF16S8:
     case IR::Opcode::ConvertF16U8:
@@ -224,6 +227,9 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::LoadStorageS16:
     case IR::Opcode::WriteStorageU16:
     case IR::Opcode::WriteStorageS16:
+    case IR::Opcode::LoadSharedU16:
+    case IR::Opcode::LoadSharedS16:
+    case IR::Opcode::WriteSharedU16:
     case IR::Opcode::SelectU16:
     case IR::Opcode::BitCastU16F16:
     case IR::Opcode::BitCastF16U16:
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index e260477511..0276fc23be 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -18,6 +18,8 @@ enum class AttributeType : u8 {
 };
 
 struct Profile {
+    u32 supported_spirv{0x00010000};
+
     bool unified_descriptor_binding{};
     bool support_vertex_instance_id{};
     bool support_float_controls{};
@@ -30,6 +32,7 @@ struct Profile {
     bool support_fp16_signed_zero_nan_preserve{};
     bool support_fp32_signed_zero_nan_preserve{};
     bool support_fp64_signed_zero_nan_preserve{};
+    bool support_explicit_workgroup_layout{};
     bool support_vote{};
     bool warp_size_potentially_larger_than_guest{};
 
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 69dd945b2d..0d6a32bfdd 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -114,10 +114,12 @@ public:
         gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size);
 
         const u64 num_texture_types{static_cast<u64>(texture_types.size())};
+        const u32 local_memory_size{LocalMemorySize()};
         const u32 texture_bound{TextureBoundBuffer()};
 
         file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
             .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
+            .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
             .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
             .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
             .write(reinterpret_cast<const char*>(&read_lowest), sizeof(read_lowest))
@@ -132,7 +134,10 @@ public:
         file.flush();
         if (stage == Shader::Stage::Compute) {
             const std::array<u32, 3> workgroup_size{WorkgroupSize()};
-            file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size));
+            const u32 shared_memory_size{SharedMemorySize()};
+            file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
+                .write(reinterpret_cast<const char*>(&shared_memory_size),
+                       sizeof(shared_memory_size));
         } else {
             file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
         }
@@ -278,6 +283,16 @@ public:
         return maxwell3d->regs.tex_cb_index;
     }
 
+    u32 LocalMemorySize() const override {
+        const u64 size{sph.LocalMemorySize()};
+        ASSERT(size <= std::numeric_limits<u32>::max());
+        return static_cast<u32>(size);
+    }
+
+    u32 SharedMemorySize() const override {
+        throw Shader::LogicError("Requesting shared memory size in graphics stage");
+    }
+
     std::array<u32, 3> WorkgroupSize() const override {
         throw Shader::LogicError("Requesting workgroup size in a graphics stage");
     }
@@ -313,6 +328,16 @@ public:
         return kepler_compute->regs.tex_cb_index;
     }
 
+    u32 LocalMemorySize() const override {
+        const auto& qmd{kepler_compute->launch_description};
+        return qmd.local_pos_alloc;
+    }
+
+    u32 SharedMemorySize() const override {
+        const auto& qmd{kepler_compute->launch_description};
+        return qmd.shared_alloc;
+    }
+
     std::array<u32, 3> WorkgroupSize() const override {
         const auto& qmd{kepler_compute->launch_description};
         return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
@@ -366,6 +391,7 @@ public:
         u64 num_texture_types{};
         file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
             .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
+            .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
             .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
             .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
             .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
@@ -381,7 +407,8 @@ public:
             texture_types.emplace(key, type);
         }
         if (stage == Shader::Stage::Compute) {
-            file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size));
+            file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
+                .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
         } else {
             file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
         }
@@ -402,6 +429,14 @@ public:
         return it->second;
     }
 
+    u32 LocalMemorySize() const override {
+        return local_memory_size;
+    }
+
+    u32 SharedMemorySize() const override {
+        return shared_memory_size;
+    }
+
     u32 TextureBoundBuffer() const override {
         return texture_bound;
     }
@@ -414,6 +449,8 @@ private:
     std::unique_ptr<u64[]> code;
     std::unordered_map<u64, Shader::TextureType> texture_types;
     std::array<u32, 3> workgroup_size{};
+    u32 local_memory_size{};
+    u32 shared_memory_size{};
     u32 texture_bound{};
     u32 read_lowest{};
     u32 read_highest{};
@@ -541,6 +578,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
     const auto& float_control{device.FloatControlProperties()};
     const VkDriverIdKHR driver_id{device.GetDriverID()};
     base_profile = Shader::Profile{
+        .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
         .unified_descriptor_binding = true,
         .support_vertex_instance_id = false,
         .support_float_controls = true,
@@ -558,6 +596,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
             float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
         .support_fp64_signed_zero_nan_preserve =
             float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
+        .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
         .support_vote = true,
         .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
         .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
@@ -600,8 +639,8 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
         shader = MakeShaderInfo(env, *cpu_shader_addr);
     }
     const ComputePipelineCacheKey key{
-        .unique_hash = shader->unique_hash,
-        .shared_memory_size = qmd.shared_alloc,
+        .unique_hash{shader->unique_hash},
+        .shared_memory_size{qmd.shared_alloc},
         .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
     };
     const auto [pair, is_new]{compute_cache.try_emplace(key)};
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 009b74f121..c027598ba2 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -399,6 +399,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
         LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
     }
 
+    VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout;
+    if (khr_workgroup_memory_explicit_layout) {
+        workgroup_layout = {
+            .sType =
+                VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR,
+            .pNext = nullptr,
+            .workgroupMemoryExplicitLayout = VK_TRUE,
+            .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE,
+            .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE,
+            .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE,
+        };
+        SetNext(next, workgroup_layout);
+    }
+
     if (!ext_depth_range_unrestricted) {
         LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
     }
@@ -662,6 +676,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
     }
 
     bool has_khr_shader_float16_int8{};
+    bool has_khr_workgroup_memory_explicit_layout{};
     bool has_ext_subgroup_size_control{};
     bool has_ext_transform_feedback{};
     bool has_ext_custom_border_color{};
@@ -682,6 +697,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
         test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
         test(khr_uniform_buffer_standard_layout,
              VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
+        test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
         test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
         test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
         test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
@@ -694,6 +710,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
         test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
         test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
         test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
+        test(has_khr_workgroup_memory_explicit_layout,
+             VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
         if (Settings::values.renderer_debug) {
             test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
                  true);
@@ -787,6 +805,22 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
             ext_extended_dynamic_state = true;
         }
     }
+    if (has_khr_workgroup_memory_explicit_layout) {
+        VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout;
+        layout.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
+        layout.pNext = nullptr;
+        features.pNext = &layout;
+        physical.GetFeatures2KHR(features);
+
+        if (layout.workgroupMemoryExplicitLayout &&
+            layout.workgroupMemoryExplicitLayout8BitAccess &&
+            layout.workgroupMemoryExplicitLayout16BitAccess &&
+            layout.workgroupMemoryExplicitLayoutScalarBlockLayout) {
+            extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
+            khr_workgroup_memory_explicit_layout = true;
+        }
+    }
     return extensions;
 }
 
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index c268a4f8dc..ac2311e7e1 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -168,11 +168,21 @@ public:
         return nv_viewport_swizzle;
     }
 
-    /// Returns true if the device supports VK_EXT_scalar_block_layout.
+    /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout.
     bool IsKhrUniformBufferStandardLayoutSupported() const {
         return khr_uniform_buffer_standard_layout;
     }
 
+    /// Returns true if the device supports VK_KHR_spirv_1_4.
+    bool IsKhrSpirv1_4Supported() const {
+        return khr_spirv_1_4;
+    }
+
+    /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
+    bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
+        return khr_workgroup_memory_explicit_layout;
+    }
+
     /// Returns true if the device supports VK_EXT_index_type_uint8.
     bool IsExtIndexTypeUint8Supported() const {
         return ext_index_type_uint8;
@@ -296,20 +306,22 @@ private:
     bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
     bool is_blit_depth_stencil_supported{};     ///< Support for blitting from and to depth stencil.
     bool nv_viewport_swizzle{};                 ///< Support for VK_NV_viewport_swizzle.
-    bool khr_uniform_buffer_standard_layout{};  ///< Support for std430 on UBOs.
-    bool ext_index_type_uint8{};                ///< Support for VK_EXT_index_type_uint8.
-    bool ext_sampler_filter_minmax{};           ///< Support for VK_EXT_sampler_filter_minmax.
-    bool ext_depth_range_unrestricted{};        ///< Support for VK_EXT_depth_range_unrestricted.
-    bool ext_shader_viewport_index_layer{};     ///< Support for VK_EXT_shader_viewport_index_layer.
-    bool ext_tooling_info{};                    ///< Support for VK_EXT_tooling_info.
-    bool ext_subgroup_size_control{};           ///< Support for VK_EXT_subgroup_size_control.
-    bool ext_transform_feedback{};              ///< Support for VK_EXT_transform_feedback.
-    bool ext_custom_border_color{};             ///< Support for VK_EXT_custom_border_color.
-    bool ext_extended_dynamic_state{};          ///< Support for VK_EXT_extended_dynamic_state.
-    bool ext_shader_stencil_export{};           ///< Support for VK_EXT_shader_stencil_export.
-    bool nv_device_diagnostics_config{};        ///< Support for VK_NV_device_diagnostics_config.
-    bool has_renderdoc{};                       ///< Has RenderDoc attached
-    bool has_nsight_graphics{};                 ///< Has Nsight Graphics attached
+    bool khr_uniform_buffer_standard_layout{};  ///< Support for scalar uniform buffer layouts.
+    bool khr_spirv_1_4{};                       ///< Support for VK_KHR_spirv_1_4.
+    bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
+    bool ext_index_type_uint8{};                 ///< Support for VK_EXT_index_type_uint8.
+    bool ext_sampler_filter_minmax{};            ///< Support for VK_EXT_sampler_filter_minmax.
+    bool ext_depth_range_unrestricted{};         ///< Support for VK_EXT_depth_range_unrestricted.
+    bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
+    bool ext_tooling_info{};                ///< Support for VK_EXT_tooling_info.
+    bool ext_subgroup_size_control{};       ///< Support for VK_EXT_subgroup_size_control.
+    bool ext_transform_feedback{};          ///< Support for VK_EXT_transform_feedback.
+    bool ext_custom_border_color{};         ///< Support for VK_EXT_custom_border_color.
+    bool ext_extended_dynamic_state{};      ///< Support for VK_EXT_extended_dynamic_state.
+    bool ext_shader_stencil_export{};       ///< Support for VK_EXT_shader_stencil_export.
+    bool nv_device_diagnostics_config{};    ///< Support for VK_NV_device_diagnostics_config.
+    bool has_renderdoc{};                   ///< Has RenderDoc attached
+    bool has_nsight_graphics{};             ///< Has Nsight Graphics attached
 
     // Telemetry parameters
     std::string vendor_name;                      ///< Device's driver name.
-- 
cgit v1.2.3-70-g09d2


From 34aba9627a8fad20b3b173180e2f3d679dd32293 Mon Sep 17 00:00:00 2001
From: FernandoS27 <fsahmkow27@gmail.com>
Date: Sat, 27 Mar 2021 22:30:24 +0100
Subject: shader: Implement BRX

---
 src/shader_recompiler/CMakeLists.txt               |   4 +
 src/shader_recompiler/backend/spirv/emit_spirv.h   |   3 +
 .../backend/spirv/emit_spirv_context_get_set.cpp   |  10 +-
 .../backend/spirv/emit_spirv_control_flow.cpp      |   4 +
 src/shader_recompiler/environment.h                |   2 +
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  12 +++
 src/shader_recompiler/frontend/ir/ir_emitter.h     |   4 +
 .../frontend/ir/microinstruction.cpp               |   1 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |   3 +
 .../frontend/maxwell/control_flow.cpp              |  58 +++++++++--
 .../frontend/maxwell/control_flow.h                |   7 +-
 .../maxwell/indirect_branch_table_track.cpp        | 108 +++++++++++++++++++++
 .../frontend/maxwell/indirect_branch_table_track.h |  28 ++++++
 .../frontend/maxwell/instruction.h                 |   1 +
 .../frontend/maxwell/structured_control_flow.cpp   |  57 +++++++++++
 .../maxwell/translate/impl/branch_indirect.cpp     |  36 +++++++
 .../maxwell/translate/impl/load_constant.cpp       |  29 +-----
 .../maxwell/translate/impl/load_constant.h         |  39 ++++++++
 .../maxwell/translate/impl/not_implemented.cpp     |   8 --
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  |  21 +++-
 .../renderer_vulkan/vk_pipeline_cache.cpp          |  50 +++++++++-
 21 files changed, 437 insertions(+), 48 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 003cbefb14..44ab929b79 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -52,6 +52,8 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/control_flow.h
     frontend/maxwell/decode.cpp
     frontend/maxwell/decode.h
+    frontend/maxwell/indirect_branch_table_track.cpp
+    frontend/maxwell/indirect_branch_table_track.h
     frontend/maxwell/instruction.h
     frontend/maxwell/location.h
     frontend/maxwell/maxwell.inc
@@ -63,6 +65,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/structured_control_flow.h
     frontend/maxwell/translate/impl/bitfield_extract.cpp
     frontend/maxwell/translate/impl/bitfield_insert.cpp
+    frontend/maxwell/translate/impl/branch_indirect.cpp
     frontend/maxwell/translate/impl/common_encoding.h
     frontend/maxwell/translate/impl/common_funcs.cpp
     frontend/maxwell/translate/impl/common_funcs.h
@@ -110,6 +113,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
     frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
     frontend/maxwell/translate/impl/load_constant.cpp
+    frontend/maxwell/translate/impl/load_constant.h
     frontend/maxwell/translate/impl/load_effective_address.cpp
     frontend/maxwell/translate/impl/load_store_attribute.cpp
     frontend/maxwell/translate/impl/load_store_local_shared.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 204c5f9e0e..02648d769c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -26,6 +26,7 @@ void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id fal
 void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label);
 void EmitSelectionMerge(EmitContext& ctx, Id merge_label);
 void EmitReturn(EmitContext& ctx);
+void EmitUnreachable(EmitContext& ctx);
 void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label);
 void EmitPrologue(EmitContext& ctx);
 void EmitEpilogue(EmitContext& ctx);
@@ -35,6 +36,8 @@ void EmitGetPred(EmitContext& ctx);
 void EmitSetPred(EmitContext& ctx);
 void EmitSetGotoVariable(EmitContext& ctx);
 void EmitGetGotoVariable(EmitContext& ctx);
+void EmitSetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetIndirectBranchVariable(EmitContext& ctx);
 Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 52dcef8a42..4a267b16c9 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -6,8 +6,6 @@
 
 #include "shader_recompiler/backend/spirv/emit_spirv.h"
 
-#pragma optimize("", off)
-
 namespace Shader::Backend::SPIRV {
 namespace {
 struct AttrInfo {
@@ -74,6 +72,14 @@ void EmitGetGotoVariable(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
+void EmitSetIndirectBranchVariable(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitGetIndirectBranchVariable(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
 static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr,
                   u32 element_size, const IR::Value& binding, const IR::Value& offset) {
     if (!binding.IsImmediate()) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
index 6b81f01694..335603f881 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -26,6 +26,10 @@ void EmitReturn(EmitContext& ctx) {
     ctx.OpReturn();
 }
 
+void EmitUnreachable(EmitContext& ctx) {
+    ctx.OpUnreachable();
+}
+
 void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) {
     ctx.OpDemoteToHelperInvocationEXT();
     ctx.OpBranch(continue_label);
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index 9415d02f66..1c50ae51e2 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -15,6 +15,8 @@ public:
 
     [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0;
 
+    [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0;
+
     [[nodiscard]] virtual TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) = 0;
 
     [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0;
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 9b898e4e1d..5524724878 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -87,6 +87,10 @@ void IREmitter::Return() {
     Inst(Opcode::Return);
 }
 
+void IREmitter::Unreachable() {
+    Inst(Opcode::Unreachable);
+}
+
 void IREmitter::DemoteToHelperInvocation(Block* continue_label) {
     block->SetBranch(continue_label);
     continue_label->AddImmediatePredecessor(block);
@@ -126,6 +130,14 @@ void IREmitter::SetGotoVariable(u32 id, const U1& value) {
     Inst(Opcode::SetGotoVariable, id, value);
 }
 
+U32 IREmitter::GetIndirectBranchVariable() {
+    return Inst<U32>(Opcode::GetIndirectBranchVariable);
+}
+
+void IREmitter::SetIndirectBranchVariable(const U32& value) {
+    Inst(Opcode::SetIndirectBranchVariable, value);
+}
+
 void IREmitter::SetPred(IR::Pred pred, const U1& value) {
     Inst(Opcode::SetPred, pred, value);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 269f367a45..17bc32fc83 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -37,6 +37,7 @@ public:
     void LoopMerge(Block* merge_block, Block* continue_target);
     void SelectionMerge(Block* merge_block);
     void Return();
+    void Unreachable();
     void DemoteToHelperInvocation(Block* continue_label);
 
     void Prologue();
@@ -51,6 +52,9 @@ public:
     [[nodiscard]] U1 GetGotoVariable(u32 id);
     void SetGotoVariable(u32 id, const U1& value);
 
+    [[nodiscard]] U32 GetIndirectBranchVariable();
+    void SetIndirectBranchVariable(const U32& value);
+
     [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
     [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
                                bool is_signed);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 52a5e50349..c3ba6b5222 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -55,6 +55,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::LoopMerge:
     case Opcode::SelectionMerge:
     case Opcode::Return:
+    case Opcode::Unreachable:
     case Opcode::DemoteToHelperInvocation:
     case Opcode::Prologue:
     case Opcode::Epilogue:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 9b050995bd..fb79e3d8dc 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -13,6 +13,7 @@ OPCODE(BranchConditional,                                   Void,           U1,
 OPCODE(LoopMerge,                                           Void,           Label,          Label,                                                          )
 OPCODE(SelectionMerge,                                      Void,           Label,                                                                          )
 OPCODE(Return,                                              Void,                                                                                           )
+OPCODE(Unreachable,                                         Void,                                                                                           )
 OPCODE(DemoteToHelperInvocation,                            Void,           Label,                                                                          )
 
 // Special operations
@@ -26,6 +27,8 @@ OPCODE(GetPred,                                             U1,             Pred
 OPCODE(SetPred,                                             Void,           Pred,           U1,                                                             )
 OPCODE(GetGotoVariable,                                     U1,             U32,                                                                            )
 OPCODE(SetGotoVariable,                                     Void,           U32,            U1,                                                             )
+OPCODE(GetIndirectBranchVariable,                           U32,                                                                                            )
+OPCODE(SetIndirectBranchVariable,                           Void,           U32,                                                                            )
 OPCODE(GetCbufU8,                                           U32,            U32,            U32,                                                            )
 OPCODE(GetCbufS8,                                           U32,            U32,            U32,                                                            )
 OPCODE(GetCbufU16,                                          U32,            U32,            U32,                                                            )
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
index 4f6707fae4..1e9b8e4260 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -14,6 +14,7 @@
 #include "shader_recompiler/exception.h"
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
 #include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
 #include "shader_recompiler/frontend/maxwell/location.h"
 
 namespace Shader::Maxwell::Flow {
@@ -252,9 +253,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
     const Opcode opcode{Decode(inst.raw)};
     switch (opcode) {
     case Opcode::BRA:
-    case Opcode::BRX:
     case Opcode::JMP:
-    case Opcode::JMX:
     case Opcode::RET:
         if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
             return AnalysisState::Continue;
@@ -264,10 +263,6 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
         case Opcode::JMP:
             AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode));
             break;
-        case Opcode::BRX:
-        case Opcode::JMX:
-            AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode));
-            break;
         case Opcode::RET:
             block->end_class = EndClass::Return;
             break;
@@ -302,6 +297,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
     case Opcode::SSY:
         block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
         return AnalysisState::Continue;
+    case Opcode::BRX:
+    case Opcode::JMX:
+        return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id);
     case Opcode::EXIT:
         return AnalyzeEXIT(block, function_id, pc, inst);
     case Opcode::PRET:
@@ -407,8 +405,46 @@ void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruct
     block->branch_true = AddLabel(block, block->stack, bra_pc, function_id);
 }
 
-void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) {
-    throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX");
+CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
+                                   FunctionId function_id) {
+    const std::optional brx_table{TrackIndirectBranchTable(env, pc, block->begin)};
+    if (!brx_table) {
+        TrackIndirectBranchTable(env, pc, block->begin);
+        throw NotImplementedException("Failed to track indirect branch");
+    }
+    const IR::FlowTest flow_test{inst.branch.flow_test};
+    const Predicate pred{inst.Pred()};
+    if (flow_test != IR::FlowTest::T || pred != Predicate{true}) {
+        throw NotImplementedException("Conditional indirect branch");
+    }
+    std::vector<u32> targets;
+    targets.reserve(brx_table->num_entries);
+    for (u32 i = 0; i < brx_table->num_entries; ++i) {
+        u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)};
+        if (!is_absolute) {
+            target += pc.Offset();
+        }
+        target += brx_table->branch_offset;
+        target += 8;
+        targets.push_back(target);
+    }
+    std::ranges::sort(targets);
+    targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
+
+    block->indirect_branches.reserve(targets.size());
+    for (const u32 target : targets) {
+        Block* const branch{AddLabel(block, block->stack, target, function_id)};
+        block->indirect_branches.push_back(branch);
+    }
+    block->cond = IR::Condition{true};
+    block->end = pc + 1;
+    block->end_class = EndClass::IndirectBranch;
+    block->branch_reg = brx_table->branch_reg;
+    block->branch_offset = brx_table->branch_offset + 8;
+    if (!is_absolute) {
+        block->branch_offset += pc.Offset();
+    }
+    return AnalysisState::Branch;
 }
 
 CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc,
@@ -449,7 +485,6 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function
         // Block already exists and it has been visited
         return &*it;
     }
-    // TODO: FIX DANGLING BLOCKS
     Block* const new_block{block_pool.Create(Block{
         .begin{pc},
         .end{pc},
@@ -494,6 +529,11 @@ std::string CFG::Dot() const {
                     add_branch(block.branch_false, false);
                 }
                 break;
+            case EndClass::IndirectBranch:
+                for (Block* const branch : block.indirect_branches) {
+                    add_branch(branch, false);
+                }
+                break;
             case EndClass::Call:
                 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
                 dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block));
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
index 22f1341944..1e05fcb97c 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -26,6 +26,7 @@ using FunctionId = size_t;
 
 enum class EndClass {
     Branch,
+    IndirectBranch,
     Call,
     Exit,
     Return,
@@ -76,11 +77,14 @@ struct Block : boost::intrusive::set_base_hook<
     union {
         Block* branch_true;
         FunctionId function_call;
+        IR::Reg branch_reg;
     };
     union {
         Block* branch_false;
         Block* return_block;
+        s32 branch_offset;
     };
+    std::vector<Block*> indirect_branches;
 };
 
 struct Label {
@@ -139,7 +143,8 @@ private:
 
     void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
                     bool is_absolute);
-    void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute);
+    AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
+                             FunctionId function_id);
     AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst);
 
     /// Return the branch target block id
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
new file mode 100644
index 0000000000..96453509d5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
@@ -0,0 +1,108 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
+
+namespace Shader::Maxwell {
+namespace {
+union Encoding {
+    u64 raw;
+    BitField<0, 8, IR::Reg> dest_reg;
+    BitField<8, 8, IR::Reg> src_reg;
+    BitField<20, 19, u64> immediate;
+    BitField<56, 1, u64> is_negative;
+    BitField<20, 24, s64> brx_offset;
+};
+
+template <typename Callable>
+std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) {
+    while (pos >= block_begin) {
+        const u64 insn{env.ReadInstruction(pos.Offset())};
+        --pos;
+        if (func(insn, Decode(insn))) {
+            return insn;
+        }
+    }
+    return std::nullopt;
+}
+
+std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos,
+                            IR::Reg brx_reg) {
+    return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) {
+        const LDC::Encoding ldc{insn};
+        return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 &&
+               ldc.mode == LDC::Mode::Default;
+    });
+}
+
+std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos,
+                            IR::Reg ldc_reg) {
+    return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) {
+        const Encoding shl{insn};
+        return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg;
+    });
+}
+
+std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos,
+                              IR::Reg shl_reg) {
+    return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) {
+        const Encoding imnmx{insn};
+        return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg;
+    });
+}
+} // Anonymous namespace
+
+std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
+                                                                Location block_begin) {
+    const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())};
+    const Opcode brx_opcode{Decode(brx_insn)};
+    if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) {
+        throw LogicError("Tracked instruction is not BRX or JMX");
+    }
+    const IR::Reg brx_reg{Encoding{brx_insn}.src_reg};
+    const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)};
+
+    Location pos{brx_pos};
+    const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)};
+    if (!ldc_insn) {
+        return std::nullopt;
+    }
+    const LDC::Encoding ldc{*ldc_insn};
+    const u32 cbuf_index{static_cast<u32>(ldc.index)};
+    const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))};
+    const IR::Reg ldc_reg{ldc.src_reg};
+
+    const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)};
+    if (!shl_insn) {
+        return std::nullopt;
+    }
+    const Encoding shl{*shl_insn};
+    const IR::Reg shl_reg{shl.src_reg};
+
+    const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)};
+    if (!imnmx_insn) {
+        return std::nullopt;
+    }
+    const Encoding imnmx{*imnmx_insn};
+    if (imnmx.is_negative != 0) {
+        return std::nullopt;
+    }
+    const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
+    return IndirectBranchTableInfo{
+        .cbuf_index{cbuf_index},
+        .cbuf_offset{cbuf_offset},
+        .num_entries{imnmx_immediate + 1},
+        .branch_offset{brx_offset},
+        .branch_reg{brx_reg},
+    };
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
new file mode 100644
index 0000000000..eee5102fa6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+
+namespace Shader::Maxwell {
+
+struct IndirectBranchTableInfo {
+    u32 cbuf_index{};
+    u32 cbuf_offset{};
+    u32 num_entries{};
+    s32 branch_offset{};
+    IR::Reg branch_reg{};
+};
+
+std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
+                                                                Location block_begin);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h
index 57fd531f2b..743d68d615 100644
--- a/src/shader_recompiler/frontend/maxwell/instruction.h
+++ b/src/shader_recompiler/frontend/maxwell/instruction.h
@@ -7,6 +7,7 @@
 #include "common/bit_field.h"
 #include "common/common_types.h"
 #include "shader_recompiler/frontend/ir/flow_test.h"
+#include "shader_recompiler/frontend/ir/reg.h"
 
 namespace Shader::Maxwell {
 
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index 9d46883902..a6e55f61ed 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -17,6 +17,7 @@
 #include "shader_recompiler/environment.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
 #include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
 #include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
 #include "shader_recompiler/frontend/maxwell/translate/translate.h"
 #include "shader_recompiler/object_pool.h"
@@ -46,12 +47,15 @@ enum class StatementType {
     Break,
     Return,
     Kill,
+    Unreachable,
     Function,
     Identity,
     Not,
     Or,
     SetVariable,
+    SetIndirectBranchVariable,
     Variable,
+    IndirectBranchCond,
 };
 
 bool HasChildren(StatementType type) {
@@ -72,12 +76,15 @@ struct Loop {};
 struct Break {};
 struct Return {};
 struct Kill {};
+struct Unreachable {};
 struct FunctionTag {};
 struct Identity {};
 struct Not {};
 struct Or {};
 struct SetVariable {};
+struct SetIndirectBranchVariable {};
 struct Variable {};
+struct IndirectBranchCond {};
 
 #ifdef _MSC_VER
 #pragma warning(push)
@@ -96,6 +103,7 @@ struct Statement : ListBaseHook {
         : cond{cond_}, up{up_}, type{StatementType::Break} {}
     Statement(Return) : type{StatementType::Return} {}
     Statement(Kill) : type{StatementType::Kill} {}
+    Statement(Unreachable) : type{StatementType::Unreachable} {}
     Statement(FunctionTag) : children{}, type{StatementType::Function} {}
     Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {}
     Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {}
@@ -103,7 +111,12 @@ struct Statement : ListBaseHook {
         : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {}
     Statement(SetVariable, u32 id_, Statement* op_, Statement* up_)
         : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {}
+    Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_)
+        : branch_offset{branch_offset_},
+          branch_reg{branch_reg_}, type{StatementType::SetIndirectBranchVariable} {}
     Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {}
+    Statement(IndirectBranchCond, u32 location_)
+        : location{location_}, type{StatementType::IndirectBranchCond} {}
 
     ~Statement() {
         if (HasChildren(type)) {
@@ -118,11 +131,14 @@ struct Statement : ListBaseHook {
         IR::Condition guest_cond;
         Statement* op;
         Statement* op_a;
+        u32 location;
+        s32 branch_offset;
     };
     union {
         Statement* cond;
         Statement* op_b;
         u32 id;
+        IR::Reg branch_reg;
     };
     Statement* up{};
     StatementType type;
@@ -141,6 +157,8 @@ std::string DumpExpr(const Statement* stmt) {
         return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b));
     case StatementType::Variable:
         return fmt::format("goto_L{}", stmt->id);
+    case StatementType::IndirectBranchCond:
+        return fmt::format("(indirect_branch == {:x})", stmt->location);
     default:
         return "<invalid type>";
     }
@@ -182,14 +200,22 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) {
         case StatementType::Kill:
             ret += fmt::format("{}    kill;\n", indent);
             break;
+        case StatementType::Unreachable:
+            ret += fmt::format("{}    unreachable;\n", indent);
+            break;
         case StatementType::SetVariable:
             ret += fmt::format("{}    goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
             break;
+        case StatementType::SetIndirectBranchVariable:
+            ret += fmt::format("{}    indirect_branch = {} + {};\n", indent, stmt->branch_reg,
+                               stmt->branch_offset);
+            break;
         case StatementType::Function:
         case StatementType::Identity:
         case StatementType::Not:
         case StatementType::Or:
         case StatementType::Variable:
+        case StatementType::IndirectBranchCond:
             throw LogicError("Statement can't be printed");
         }
     }
@@ -417,6 +443,17 @@ private:
                 }
                 break;
             }
+            case Flow::EndClass::IndirectBranch:
+                root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg,
+                                             block.branch_offset));
+                for (Flow::Block* const branch : block.indirect_branches) {
+                    const Node indirect_label{local_labels.at(branch)};
+                    Statement* cond{pool.Create(IndirectBranchCond{}, branch->begin.Offset())};
+                    Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)};
+                    gotos.push_back(root.insert(ip, *goto_stmt));
+                }
+                root.insert(ip, *pool.Create(Unreachable{}));
+                break;
             case Flow::EndClass::Call: {
                 Flow::Function& call{cfg.Functions()[block.function_call]};
                 const Node call_return_label{local_labels.at(block.return_block)};
@@ -623,6 +660,8 @@ IR::Block* TryFindForwardBlock(const Statement& stmt) {
         return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b));
     case StatementType::Variable:
         return ir.GetGotoVariable(stmt.id);
+    case StatementType::IndirectBranchCond:
+        return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location));
     default:
         throw NotImplementedException("Statement type {}", stmt.type);
     }
@@ -670,6 +709,15 @@ private:
                 ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
                 break;
             }
+            case StatementType::SetIndirectBranchVariable: {
+                if (!current_block) {
+                    current_block = MergeBlock(parent, stmt);
+                }
+                IR::IREmitter ir{*current_block};
+                IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))};
+                ir.SetIndirectBranchVariable(address);
+                break;
+            }
             case StatementType::If: {
                 if (!current_block) {
                     current_block = block_pool.Create(inst_pool);
@@ -756,6 +804,15 @@ private:
                 current_block = demote_block;
                 break;
             }
+            case StatementType::Unreachable: {
+                if (!current_block) {
+                    current_block = block_pool.Create(inst_pool);
+                    block_list.push_back(current_block);
+                }
+                IR::IREmitter{*current_block}.Unreachable();
+                current_block = nullptr;
+                break;
+            }
             default:
                 throw NotImplementedException("Statement type {}", stmt.type);
             }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
new file mode 100644
index 0000000000..371c0e0f74
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
@@ -0,0 +1,36 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void Check(u64 insn) {
+    union {
+        u64 raw;
+        BitField<5, 1, u64> cbuf_mode;
+        BitField<6, 1, u64> lmt;
+    } const encoding{insn};
+
+    if (encoding.cbuf_mode != 0) {
+        throw NotImplementedException("Constant buffer mode");
+    }
+    if (encoding.lmt != 0) {
+        throw NotImplementedException("LMT");
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BRX(u64 insn) {
+    Check(insn);
+}
+
+void TranslatorVisitor::JMX(u64 insn) {
+    Check(insn);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
index 39becf93c2..49ccb7d62a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -5,25 +5,11 @@
 #include "common/bit_field.h"
 #include "common/common_types.h"
 #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
 
 namespace Shader::Maxwell {
+using namespace LDC;
 namespace {
-enum class Mode : u64 {
-    Default,
-    IL,
-    IS,
-    ISL,
-};
-
-enum class Size : u64 {
-    U8,
-    S8,
-    U16,
-    S16,
-    B32,
-    B64,
-};
-
 std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
                                  const IR::U32& reg, const IR::U32& imm) {
     switch (mode) {
@@ -37,16 +23,7 @@ std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& im
 } // Anonymous namespace
 
 void TranslatorVisitor::LDC(u64 insn) {
-    union {
-        u64 raw;
-        BitField<0, 8, IR::Reg> dest_reg;
-        BitField<8, 8, IR::Reg> src_reg;
-        BitField<20, 16, s64> offset;
-        BitField<36, 5, u64> index;
-        BitField<44, 2, Mode> mode;
-        BitField<48, 3, Size> size;
-    } const ldc{insn};
-
+    const Encoding ldc{insn};
     const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
     const IR::U32 reg{X(ldc.src_reg)};
     const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
new file mode 100644
index 0000000000..3074ea0e3d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
@@ -0,0 +1,39 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+
+namespace Shader::Maxwell::LDC {
+
+enum class Mode : u64 {
+    Default,
+    IL,
+    IS,
+    ISL,
+};
+
+enum class Size : u64 {
+    U8,
+    S8,
+    U16,
+    S16,
+    B32,
+    B64,
+};
+
+union Encoding {
+    u64 raw;
+    BitField<0, 8, IR::Reg> dest_reg;
+    BitField<8, 8, IR::Reg> src_reg;
+    BitField<20, 16, s64> offset;
+    BitField<36, 5, u64> index;
+    BitField<44, 2, Mode> mode;
+    BitField<48, 3, Size> size;
+};
+
+} // namespace Shader::Maxwell::LDC
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index b62d8ee2aa..a0057a4739 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -53,10 +53,6 @@ void TranslatorVisitor::BRK(u64) {
     ThrowNotImplemented(Opcode::BRK);
 }
 
-void TranslatorVisitor::BRX(u64) {
-    ThrowNotImplemented(Opcode::BRX);
-}
-
 void TranslatorVisitor::CAL() {
     // CAL is a no-op
 }
@@ -181,10 +177,6 @@ void TranslatorVisitor::JMP(u64) {
     ThrowNotImplemented(Opcode::JMP);
 }
 
-void TranslatorVisitor::JMX(u64) {
-    ThrowNotImplemented(Opcode::JMX);
-}
-
 void TranslatorVisitor::KIL() {
     // KIL is a no-op
 }
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index bab7ca1868..2592337461 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -48,8 +48,12 @@ struct GotoVariable : FlagTag {
     u32 index;
 };
 
+struct IndirectBranchVariable {
+    auto operator<=>(const IndirectBranchVariable&) const noexcept = default;
+};
+
 using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag,
-                             OverflowFlagTag, GotoVariable>;
+                             OverflowFlagTag, GotoVariable, IndirectBranchVariable>;
 using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>;
 
 struct DefTable {
@@ -65,6 +69,10 @@ struct DefTable {
         return goto_vars[goto_variable.index];
     }
 
+    [[nodiscard]] ValueMap& operator[](IndirectBranchVariable) {
+        return indirect_branch_var;
+    }
+
     [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept {
         return zero_flag;
     }
@@ -84,6 +92,7 @@ struct DefTable {
     std::array<ValueMap, IR::NUM_USER_REGS> regs;
     std::array<ValueMap, IR::NUM_USER_PREDS> preds;
     boost::container::flat_map<u32, ValueMap> goto_vars;
+    ValueMap indirect_branch_var;
     ValueMap zero_flag;
     ValueMap sign_flag;
     ValueMap carry_flag;
@@ -102,6 +111,10 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept {
     return IR::Opcode::UndefU1;
 }
 
+IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
+    return IR::Opcode::UndefU32;
+}
+
 [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {
     return inst.Opcode() == IR::Opcode::Phi;
 }
@@ -219,6 +232,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
     case IR::Opcode::SetGotoVariable:
         pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
         break;
+    case IR::Opcode::SetIndirectBranchVariable:
+        pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0));
+        break;
     case IR::Opcode::SetZFlag:
         pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
         break;
@@ -244,6 +260,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
     case IR::Opcode::GetGotoVariable:
         inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
         break;
+    case IR::Opcode::GetIndirectBranchVariable:
+        inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block));
+        break;
     case IR::Opcode::GetZFlag:
         inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
         break;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 8b2816c131..6cde014912 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -47,7 +47,7 @@ auto MakeSpan(Container& container) {
 }
 
 u64 MakeCbufKey(u32 index, u32 offset) {
-    return (static_cast<u64>(index) << 32) | static_cast<u64>(offset);
+    return (static_cast<u64>(index) << 32) | offset;
 }
 
 class GenericEnvironment : public Shader::Environment {
@@ -114,11 +114,13 @@ public:
         gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size);
 
         const u64 num_texture_types{static_cast<u64>(texture_types.size())};
+        const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
         const u32 local_memory_size{LocalMemorySize()};
         const u32 texture_bound{TextureBoundBuffer()};
 
         file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
             .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
+            .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
             .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
             .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
             .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
@@ -130,6 +132,10 @@ public:
             file.write(reinterpret_cast<const char*>(&key), sizeof(key))
                 .write(reinterpret_cast<const char*>(&type), sizeof(type));
         }
+        for (const auto [key, type] : cbuf_values) {
+            file.write(reinterpret_cast<const char*>(&key), sizeof(key))
+                .write(reinterpret_cast<const char*>(&type), sizeof(type));
+        }
         if (stage == Shader::Stage::Compute) {
             const std::array<u32, 3> workgroup_size{WorkgroupSize()};
             const u32 shared_memory_size{SharedMemorySize()};
@@ -212,6 +218,7 @@ protected:
 
     std::vector<u64> code;
     std::unordered_map<u64, Shader::TextureType> texture_types;
+    std::unordered_map<u64, u32> cbuf_values;
 
     u32 read_lowest = std::numeric_limits<u32>::max();
     u32 read_highest = 0;
@@ -267,6 +274,17 @@ public:
 
     ~GraphicsEnvironment() override = default;
 
+    u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
+        const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
+        ASSERT(cbuf.enabled);
+        u32 value{};
+        if (cbuf_offset < cbuf.size) {
+            value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
+        }
+        cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
+        return value;
+    }
+
     Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override {
         const auto& regs{maxwell3d->regs};
         const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
@@ -312,6 +330,18 @@ public:
 
     ~ComputeEnvironment() override = default;
 
+    u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
+        const auto& qmd{kepler_compute->launch_description};
+        ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
+        const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
+        u32 value{};
+        if (cbuf_offset < cbuf.size) {
+            value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
+        }
+        cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
+        return value;
+    }
+
     Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override {
         const auto& regs{kepler_compute->regs};
         const auto& qmd{kepler_compute->launch_description};
@@ -386,8 +416,10 @@ public:
     void Deserialize(std::ifstream& file) {
         u64 code_size{};
         u64 num_texture_types{};
+        u64 num_cbuf_values{};
         file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
             .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
+            .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
             .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
             .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
             .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
@@ -403,6 +435,13 @@ public:
                 .read(reinterpret_cast<char*>(&type), sizeof(type));
             texture_types.emplace(key, type);
         }
+        for (size_t i = 0; i < num_cbuf_values; ++i) {
+            u64 key;
+            u32 value;
+            file.read(reinterpret_cast<char*>(&key), sizeof(key))
+                .read(reinterpret_cast<char*>(&value), sizeof(value));
+            cbuf_values.emplace(key, value);
+        }
         if (stage == Shader::Stage::Compute) {
             file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
                 .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
@@ -418,6 +457,14 @@ public:
         return code[(address - read_lowest) / sizeof(u64)];
     }
 
+    u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
+        const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
+        if (it == cbuf_values.end()) {
+            throw Shader::LogicError("Uncached read texture type");
+        }
+        return it->second;
+    }
+
     Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override {
         const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))};
         if (it == texture_types.end()) {
@@ -445,6 +492,7 @@ public:
 private:
     std::unique_ptr<u64[]> code;
     std::unordered_map<u64, Shader::TextureType> texture_types;
+    std::unordered_map<u64, u32> cbuf_values;
     std::array<u32, 3> workgroup_size{};
     u32 local_memory_size{};
     u32 shared_memory_size{};
-- 
cgit v1.2.3-70-g09d2


From 514a6b07eedace58b4a0c95282bdfc729623d1d9 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 30 Mar 2021 03:19:50 -0300
Subject: shader: Store type of phi nodes in flags

This is needed because pseudo-instructions where invalidated.
---
 src/shader_recompiler/backend/spirv/emit_spirv.cpp     | 3 ++-
 src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 ++++
 src/shader_recompiler/frontend/ir/value.cpp            | 6 +++++-
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 2e7e6bb0c9..6389d80bfb 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -288,7 +288,8 @@ Id EmitPhi(EmitContext& ctx, IR::Inst* inst) {
         operands.push_back(PhiArgDef(ctx, inst, index));
         operands.push_back(inst->PhiBlock(index)->Definition<Id>());
     }
-    const Id result_type{TypeId(ctx, inst->Arg(0).Type())};
+    // The type of a phi instruction is stored in its flags
+    const Id result_type{TypeId(ctx, inst->Flags<IR::Type>())};
     return ctx.OpPhi(result_type, std::span(operands.data(), operands.size()));
 }
 
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index c3ba6b5222..074c71d533 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -193,6 +193,10 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
     if (!value.IsImmediate()) {
         Use(value);
     }
+    if (Flags<IR::Type>() == IR::Type::Void) {
+        // Set the type of the phi node
+        SetFlags<IR::Type>(value.Type());
+    }
     phi_args.emplace_back(predecessor, value);
 }
 
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index e8e4662e7b..837c1b487f 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -56,7 +56,11 @@ bool Value::IsLabel() const noexcept {
 }
 
 IR::Type Value::Type() const noexcept {
-    if (IsIdentity() || IsPhi()) {
+    if (IsPhi()) {
+        // The type of a phi node is stored in its flags
+        return inst->Flags<IR::Type>();
+    }
+    if (IsIdentity()) {
         return inst->Arg(0).Type();
     }
     if (type == Type::Opaque) {
-- 
cgit v1.2.3-70-g09d2


From fc93bc2abde0b54a0a495f9b28a76fd34b47f320 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 4 Apr 2021 03:04:48 -0300
Subject: shader: Implement BAR and fix memory barriers

---
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  1 +
 .../backend/spirv/emit_spirv_barriers.cpp          | 15 ++++--
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  4 ++
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  1 +
 .../frontend/ir/microinstruction.cpp               |  4 ++
 src/shader_recompiler/frontend/ir/opcodes.inc      |  1 +
 .../maxwell/translate/impl/barrier_operations.cpp  | 58 +++++++++++++++++++++-
 7 files changed, 79 insertions(+), 5 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index e066ba87d5..032b0b2f9d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -28,6 +28,7 @@ void EmitSelectionMerge(EmitContext& ctx, Id merge_label);
 void EmitReturn(EmitContext& ctx);
 void EmitUnreachable(EmitContext& ctx);
 void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label);
+void EmitBarrier(EmitContext& ctx);
 void EmitMemoryBarrierWorkgroupLevel(EmitContext& ctx);
 void EmitMemoryBarrierDeviceLevel(EmitContext& ctx);
 void EmitMemoryBarrierSystemLevel(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
index 18f512319b..74f523d0ff 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
@@ -8,16 +8,25 @@
 namespace Shader::Backend::SPIRV {
 namespace {
 void EmitMemoryBarrierImpl(EmitContext& ctx, spv::Scope scope) {
-    const auto semantics =
+    const auto semantics{
         spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
         spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory |
-        spv::MemorySemanticsMask::ImageMemory;
+        spv::MemorySemanticsMask::ImageMemory};
     ctx.OpMemoryBarrier(ctx.Constant(ctx.U32[1], static_cast<u32>(scope)),
                         ctx.Constant(ctx.U32[1], static_cast<u32>(semantics)));
 }
-
 } // Anonymous namespace
 
+void EmitBarrier(EmitContext& ctx) {
+    const auto execution{spv::Scope::Workgroup};
+    const auto memory{spv::Scope::Workgroup};
+    const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
+                                spv::MemorySemanticsMask::WorkgroupMemory};
+    ctx.OpControlBarrier(ctx.Constant(ctx.U32[1], static_cast<u32>(execution)),
+                         ctx.Constant(ctx.U32[1], static_cast<u32>(memory)),
+                         ctx.Constant(ctx.U32[1], static_cast<u32>(memory_semantics)));
+}
+
 void EmitMemoryBarrierWorkgroupLevel(EmitContext& ctx) {
     EmitMemoryBarrierImpl(ctx, spv::Scope::Workgroup);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index dbd38a28b3..246c3b9ef1 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -82,6 +82,10 @@ void IREmitter::SelectionMerge(Block* merge_block) {
     Inst(Opcode::SelectionMerge, merge_block);
 }
 
+void IREmitter::Barrier() {
+    Inst(Opcode::Barrier);
+}
+
 void IREmitter::MemoryBarrier(MemoryScope scope) {
     switch (scope) {
     case MemoryScope::Workgroup:
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 81a57fefea..1b00c548d0 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -128,6 +128,7 @@ public:
     [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
                                const Value& false_value);
 
+    [[nodiscard]] void Barrier();
     [[nodiscard]] void MemoryBarrier(MemoryScope scope);
 
     template <typename Dest, typename Source>
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 074c71d533..481202d94b 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -57,6 +57,10 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::Return:
     case Opcode::Unreachable:
     case Opcode::DemoteToHelperInvocation:
+    case Opcode::Barrier:
+    case Opcode::MemoryBarrierWorkgroupLevel:
+    case Opcode::MemoryBarrierDeviceLevel:
+    case Opcode::MemoryBarrierSystemLevel:
     case Opcode::Prologue:
     case Opcode::Epilogue:
     case Opcode::SetAttribute:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 734f5328be..dcd54bcf79 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -17,6 +17,7 @@ OPCODE(Unreachable,                                         Void,
 OPCODE(DemoteToHelperInvocation,                            Void,           Label,                                                                          )
 
 // Barriers
+OPCODE(Barrier,                                             Void,                                                                                           )
 OPCODE(MemoryBarrierWorkgroupLevel,                         Void,                                                                                           )
 OPCODE(MemoryBarrierDeviceLevel,                            Void,                                                                                           )
 OPCODE(MemoryBarrierSystemLevel,                            Void,                                                                                           )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
index 26d5e276bc..2a2a294dfa 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -38,6 +38,7 @@ void TranslatorVisitor::MEMBAR(u64 inst) {
         u64 raw;
         BitField<8, 2, LocalScope> scope;
     } membar{inst};
+
     ir.MemoryBarrier(LocalScopeToMemoryScope(membar.scope));
 }
 
@@ -45,8 +46,61 @@ void TranslatorVisitor::DEPBAR() {
     // DEPBAR is a no-op
 }
 
-void TranslatorVisitor::BAR(u64) {
-    throw NotImplementedException("Instruction {} is not implemented", Opcode::BAR);
+void TranslatorVisitor::BAR(u64 insn) {
+    enum class Mode {
+        RedPopc,
+        Scan,
+        RedAnd,
+        RedOr,
+        Sync,
+        Arrive,
+    };
+    union {
+        u64 raw;
+        BitField<43, 1, u64> is_a_imm;
+        BitField<44, 1, u64> is_b_imm;
+        BitField<8, 8, u64> imm_a;
+        BitField<20, 12, u64> imm_b;
+        BitField<42, 1, u64> neg_pred;
+        BitField<39, 3, IR::Pred> pred;
+    } const bar{insn};
+
+    const Mode mode{[insn] {
+        switch (insn & 0x0000009B00000000ULL) {
+        case 0x0000000200000000ULL:
+            return Mode::RedPopc;
+        case 0x0000000300000000ULL:
+            return Mode::Scan;
+        case 0x0000000A00000000ULL:
+            return Mode::RedAnd;
+        case 0x0000001200000000ULL:
+            return Mode::RedOr;
+        case 0x0000008000000000ULL:
+            return Mode::Sync;
+        case 0x0000008100000000ULL:
+            return Mode::Arrive;
+        }
+        throw NotImplementedException("Invalid encoding");
+    }()};
+    if (mode != Mode::Sync) {
+        throw NotImplementedException("BAR mode {}", mode);
+    }
+    if (bar.is_a_imm == 0) {
+        throw NotImplementedException("Non-immediate input A");
+    }
+    if (bar.imm_a != 0) {
+        throw NotImplementedException("Non-zero input A");
+    }
+    if (bar.is_b_imm == 0) {
+        throw NotImplementedException("Non-immediate input B");
+    }
+    if (bar.imm_b != 0) {
+        throw NotImplementedException("Non-zero input B");
+    }
+    if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
+        throw NotImplementedException("Non-true input predicate");
+    }
+    ir.Barrier();
 }
 
 } // namespace Shader::Maxwell
-- 
cgit v1.2.3-70-g09d2


From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001
From: lat9nq <22451773+lat9nq@users.noreply.github.com>
Date: Mon, 5 Apr 2021 22:25:22 -0400
Subject: shader_recompiler,video_core: Cleanup some GCC and Clang errors

Mostly fixing unused *, implicit conversion, braced scalar init,
fpermissive, and some others.

Some Clang errors likely remain in video_core, and std::ranges is still
a pertinent issue in shader_recompiler

shader_recompiler: cmake: Force bracket depth to 1024 on Clang
Increases the maximum fold expression depth

thread_worker: Include condition_variable

Don't use list initializers in control flow

Co-authored-by: ReinUsesLisp <reinuseslisp@airmail.cc>
---
 src/common/thread_worker.h                         |   1 +
 src/shader_recompiler/CMakeLists.txt               |   2 +
 .../backend/spirv/emit_context.cpp                 |   4 +-
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |  19 +--
 .../backend/spirv/emit_spirv_image.cpp             |  11 +-
 .../backend/spirv/emit_spirv_warp.cpp              |   2 +-
 src/shader_recompiler/file_environment.h           |   2 +-
 src/shader_recompiler/frontend/ir/attribute.cpp    |   4 +-
 src/shader_recompiler/frontend/ir/basic_block.cpp  |   2 +-
 src/shader_recompiler/frontend/ir/condition.cpp    |   6 +-
 src/shader_recompiler/frontend/ir/condition.h      |   4 +-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |   4 +-
 .../frontend/ir/microinstruction.cpp               |  16 +--
 .../frontend/ir/microinstruction.h                 |   4 +-
 src/shader_recompiler/frontend/ir/opcodes.cpp      |   2 +-
 src/shader_recompiler/frontend/ir/program.cpp      |   2 -
 src/shader_recompiler/frontend/ir/value.cpp        |   4 +-
 src/shader_recompiler/frontend/ir/value.h          |   2 +-
 .../frontend/maxwell/control_flow.cpp              | 140 +++++++++------------
 src/shader_recompiler/frontend/maxwell/decode.cpp  |  10 +-
 .../maxwell/indirect_branch_table_track.cpp        |  10 +-
 .../frontend/maxwell/structured_control_flow.cpp   |   3 +-
 .../frontend/maxwell/translate/impl/double_add.cpp |   6 +-
 .../translate/impl/double_fused_multiply_add.cpp   |   6 +-
 .../maxwell/translate/impl/double_multiply.cpp     |   6 +-
 .../maxwell/translate/impl/floating_point_add.cpp  |   6 +-
 .../translate/impl/floating_point_compare.cpp      |   3 +-
 .../impl/floating_point_compare_and_set.cpp        |   6 +-
 .../floating_point_conversion_floating_point.cpp   |   6 +-
 .../impl/floating_point_conversion_integer.cpp     |  11 +-
 .../impl/floating_point_fused_multiply_add.cpp     |   6 +-
 .../translate/impl/floating_point_min_max.cpp      |   6 +-
 .../translate/impl/floating_point_multiply.cpp     |   8 +-
 .../impl/floating_point_set_predicate.cpp          |   6 +-
 .../translate/impl/floating_point_swizzled_add.cpp |   6 +-
 .../translate/impl/half_floating_point_add.cpp     |  11 +-
 .../half_floating_point_fused_multiply_add.cpp     |  11 +-
 .../impl/half_floating_point_multiply.cpp          |  11 +-
 .../translate/impl/half_floating_point_set.cpp     |  11 +-
 .../impl/half_floating_point_set_predicate.cpp     |  12 +-
 .../frontend/maxwell/translate/impl/impl.cpp       |   8 +-
 .../maxwell/translate/impl/integer_add.cpp         |   1 -
 .../impl/integer_floating_point_conversion.cpp     |   4 +-
 .../maxwell/translate/impl/load_constant.cpp       |   2 +-
 .../translate/impl/load_store_local_shared.cpp     |   9 +-
 .../maxwell/translate/impl/load_store_memory.cpp   |   4 +-
 .../maxwell/translate/impl/texture_fetch.cpp       |   2 +-
 .../translate/impl/texture_fetch_swizzled.cpp      |   2 +-
 .../translate/impl/texture_gather_swizzled.cpp     |   2 +-
 .../translate/impl/texture_load_swizzled.cpp       |   2 +-
 .../maxwell/translate/impl/texture_query.cpp       |   2 +-
 .../maxwell/translate/impl/video_set_predicate.cpp |   1 -
 .../ir_opt/collect_shader_info_pass.cpp            |  20 +--
 .../ir_opt/constant_propagation_pass.cpp           |  49 ++++----
 .../global_memory_to_storage_buffer_pass.cpp       |  42 +++----
 .../ir_opt/identity_removal_pass.cpp               |   3 +-
 .../ir_opt/lower_fp16_to_fp32.cpp                  |   2 +-
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  |   4 +-
 src/shader_recompiler/ir_opt/texture_pass.cpp      |  32 ++---
 src/shader_recompiler/ir_opt/verification_pass.cpp |   4 +-
 src/tests/common/unique_function.cpp               |   2 +
 src/video_core/CMakeLists.txt                      |   2 +-
 .../renderer_vulkan/vk_graphics_pipeline.cpp       |  21 ++--
 .../renderer_vulkan/vk_pipeline_cache.cpp          |   5 +-
 .../renderer_vulkan/vk_render_pass_cache.cpp       |   2 -
 .../renderer_vulkan/vk_texture_cache.cpp           |   2 +-
 66 files changed, 308 insertions(+), 313 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h
index 0a975a869d..cd0017726f 100644
--- a/src/common/thread_worker.h
+++ b/src/common/thread_worker.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <atomic>
+#include <condition_variable>
 #include <functional>
 #include <mutex>
 #include <stop_token>
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 22639fe132..551bf1c582 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -196,6 +196,8 @@ else()
         $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
         $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
         -Werror=unused-variable
+
+        $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024>
     )
 endif()
 
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index b738e00cc2..0c114402b4 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -4,6 +4,7 @@
 
 #include <algorithm>
 #include <array>
+#include <climits>
 #include <string_view>
 
 #include <fmt/format.h>
@@ -116,7 +117,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
         const std::string_view def_name_view(
             def_name.data(),
             fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size);
-        defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view);
+        defs[static_cast<size_t>(i)] =
+            sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view);
     }
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 32512a0e5f..355cf0ca8a 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -16,7 +16,7 @@
 namespace Shader::Backend::SPIRV {
 namespace {
 template <class Func>
-struct FuncTraits : FuncTraits<Func> {};
+struct FuncTraits {};
 
 template <class ReturnType_, class... Args>
 struct FuncTraits<ReturnType_ (*)(Args...)> {
@@ -64,17 +64,20 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
 template <auto func, bool is_first_arg_inst, size_t... I>
 void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
     using Traits = FuncTraits<decltype(func)>;
-    if constexpr (std::is_same_v<Traits::ReturnType, Id>) {
+    if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
         if constexpr (is_first_arg_inst) {
-            SetDefinition<func>(ctx, inst, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...);
+            SetDefinition<func>(
+                ctx, inst, inst,
+                Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
         } else {
-            SetDefinition<func>(ctx, inst, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...);
+            SetDefinition<func>(
+                ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
         }
     } else {
         if constexpr (is_first_arg_inst) {
-            func(ctx, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...);
+            func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
         } else {
-            func(ctx, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...);
+            func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
         }
     }
 }
@@ -94,14 +97,14 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) {
 }
 
 void EmitInst(EmitContext& ctx, IR::Inst* inst) {
-    switch (inst->Opcode()) {
+    switch (inst->GetOpcode()) {
 #define OPCODE(name, result_type, ...)                                                             \
     case IR::Opcode::name:                                                                         \
         return Invoke<&Emit##name>(ctx, inst);
 #include "shader_recompiler/frontend/ir/opcodes.inc"
 #undef OPCODE
     }
-    throw LogicError("Invalid opcode {}", inst->Opcode());
+    throw LogicError("Invalid opcode {}", inst->GetOpcode());
 }
 
 Id TypeId(const EmitContext& ctx, IR::Type type) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index f0f8db8c37..815ca62992 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -43,11 +43,13 @@ public:
             // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING");
             return;
         }
-        const IR::Opcode opcode{values[0]->Opcode()};
-        if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
+        const IR::Opcode opcode{values[0]->GetOpcode()};
+        if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
             throw LogicError("Invalid PTP arguments");
         }
-        auto read{[&](int a, int b) { return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); }};
+        auto read{[&](unsigned int a, unsigned int b) {
+            return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32());
+        }};
 
         const Id offsets{
             ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)),
@@ -297,13 +299,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
 
 Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
                        const IR::Value& offset, const IR::Value& offset2, Id dref) {
-    const auto info{inst->Flags<IR::TextureInstInfo>()};
     const ImageOperands operands(ctx, offset, offset2);
     return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
                 ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
 }
 
+#ifdef _WIN32
 #pragma optimize("", off)
+#endif
 
 Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
                   Id lod, Id ms) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index c57bd291db..12a03ed6ed 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -7,7 +7,7 @@
 namespace Shader::Backend::SPIRV {
 namespace {
 Id WarpExtract(EmitContext& ctx, Id value) {
-    const Id shift{ctx.Constant(ctx.U32[1], 5)};
+    [[maybe_unused]] const Id shift{ctx.Constant(ctx.U32[1], 5)};
     const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
     return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
 }
diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h
index 17640a6229..71601f8fd6 100644
--- a/src/shader_recompiler/file_environment.h
+++ b/src/shader_recompiler/file_environment.h
@@ -7,7 +7,7 @@
 
 namespace Shader {
 
-class FileEnvironment final : public Environment {
+class FileEnvironment : public Environment {
 public:
     explicit FileEnvironment(const char* path);
     ~FileEnvironment() override;
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
index 4811242ea0..7993e5c436 100644
--- a/src/shader_recompiler/frontend/ir/attribute.cpp
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -17,7 +17,7 @@ u32 GenericAttributeIndex(Attribute attribute) {
     if (!IsGeneric(attribute)) {
         throw InvalidArgument("Attribute is not generic {}", attribute);
     }
-    return (static_cast<int>(attribute) - static_cast<int>(Attribute::Generic0X)) / 4;
+    return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u;
 }
 
 std::string NameOf(Attribute attribute) {
@@ -444,4 +444,4 @@ std::string NameOf(Attribute attribute) {
     return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
 }
 
-} // namespace Shader::IR
\ No newline at end of file
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index ec029dfd6e..e1f0191f40 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -155,7 +155,7 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
     ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd());
 
     for (const Inst& inst : block) {
-        const Opcode op{inst.Opcode()};
+        const Opcode op{inst.GetOpcode()};
         ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
         if (TypeOf(op) != Type::Void) {
             ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp
index ec1659e2bc..fc18ea2a2f 100644
--- a/src/shader_recompiler/frontend/ir/condition.cpp
+++ b/src/shader_recompiler/frontend/ir/condition.cpp
@@ -12,10 +12,10 @@ namespace Shader::IR {
 
 std::string NameOf(Condition condition) {
     std::string ret;
-    if (condition.FlowTest() != FlowTest::T) {
-        ret = fmt::to_string(condition.FlowTest());
+    if (condition.GetFlowTest() != FlowTest::T) {
+        ret = fmt::to_string(condition.GetFlowTest());
     }
-    const auto [pred, negated]{condition.Pred()};
+    const auto [pred, negated]{condition.GetPred()};
     if (!ret.empty()) {
         ret += '&';
     }
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h
index 51c2f15cf5..aa8597c608 100644
--- a/src/shader_recompiler/frontend/ir/condition.h
+++ b/src/shader_recompiler/frontend/ir/condition.h
@@ -30,11 +30,11 @@ public:
 
     auto operator<=>(const Condition&) const noexcept = default;
 
-    [[nodiscard]] IR::FlowTest FlowTest() const noexcept {
+    [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept {
         return static_cast<IR::FlowTest>(flow_test);
     }
 
-    [[nodiscard]] std::pair<IR::Pred, bool> Pred() const noexcept {
+    [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept {
         return {static_cast<IR::Pred>(pred), pred_negated != 0};
     }
 
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 13eb2de4c4..a2104bdb31 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -290,8 +290,8 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) {
 }
 
 U1 IREmitter::Condition(IR::Condition cond) {
-    const FlowTest flow_test{cond.FlowTest()};
-    const auto [pred, is_negated]{cond.Pred()};
+    const FlowTest flow_test{cond.GetFlowTest()};
+    const auto [pred, is_negated]{cond.GetPred()};
     return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test));
 }
 
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 481202d94b..ceb44e6042 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -12,7 +12,7 @@
 namespace Shader::IR {
 namespace {
 void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
-    if (inst && inst->Opcode() != opcode) {
+    if (inst && inst->GetOpcode() != opcode) {
         throw LogicError("Invalid pseudo-instruction");
     }
 }
@@ -25,11 +25,17 @@ void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
 }
 
 void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
-    if (inst->Opcode() != expected_opcode) {
+    if (inst->GetOpcode() != expected_opcode) {
         throw LogicError("Undoing use of invalid pseudo-op");
     }
     inst = nullptr;
 }
+
+void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
+    if (!associated_insts) {
+        associated_insts = std::make_unique<AssociatedInsts>();
+    }
+}
 } // Anonymous namespace
 
 Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
@@ -249,12 +255,6 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) {
     op = opcode;
 }
 
-void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
-    if (!associated_insts) {
-        associated_insts = std::make_unique<AssociatedInsts>();
-    }
-}
-
 void Inst::Use(const Value& value) {
     Inst* const inst{value.Inst()};
     ++inst->use_count;
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 6658dc674e..97dc91d855 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -46,7 +46,7 @@ public:
     }
 
     /// Get the opcode this microinstruction represents.
-    [[nodiscard]] IR::Opcode Opcode() const noexcept {
+    [[nodiscard]] IR::Opcode GetOpcode() const noexcept {
         return op;
     }
 
@@ -95,7 +95,7 @@ public:
     requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
         [[nodiscard]] FlagsType Flags() const noexcept {
         FlagsType ret;
-        std::memcpy(&ret, &flags, sizeof(ret));
+        std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
         return ret;
     }
 
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
index 1cb9db6c9c..002dbf94e9 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.cpp
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -49,7 +49,7 @@ constexpr std::array META_TABLE{
 #define OPCODE(name_token, type_token, ...)                                                        \
     OpcodeMeta{                                                                                    \
         .name{#name_token},                                                                        \
-        .type{type_token},                                                                         \
+        .type = type_token,                                                                         \
         .arg_types{__VA_ARGS__},                                                                   \
     },
 #include "opcodes.inc"
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp
index 5f51aeb5f3..89a17fb1b4 100644
--- a/src/shader_recompiler/frontend/ir/program.cpp
+++ b/src/shader_recompiler/frontend/ir/program.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#pragma once
-
 #include <map>
 #include <string>
 
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index 837c1b487f..1e7ffb86d5 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -33,11 +33,11 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
 Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
 
 bool Value::IsIdentity() const noexcept {
-    return type == Type::Opaque && inst->Opcode() == Opcode::Identity;
+    return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity;
 }
 
 bool Value::IsPhi() const noexcept {
-    return type == Type::Opaque && inst->Opcode() == Opcode::Phi;
+    return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi;
 }
 
 bool Value::IsEmpty() const noexcept {
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index b27601e704..a0962863d8 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -94,7 +94,7 @@ public:
         }
     }
 
-    explicit TypedValue(IR::Inst* inst) : TypedValue(Value(inst)) {}
+    explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
 };
 
 using U1 = TypedValue<Type::U1>;
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
index 847bb19864..cb8ec7eaa3 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -34,41 +34,37 @@ struct Compare {
 };
 
 u32 BranchOffset(Location pc, Instruction inst) {
-    return pc.Offset() + inst.branch.Offset() + 8;
+    return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
 }
 
 void Split(Block* old_block, Block* new_block, Location pc) {
     if (pc <= old_block->begin || pc >= old_block->end) {
         throw InvalidArgument("Invalid address to split={}", pc);
     }
-    *new_block = Block{
-        .begin{pc},
-        .end{old_block->end},
-        .end_class{old_block->end_class},
-        .cond{old_block->cond},
-        .stack{old_block->stack},
-        .branch_true{old_block->branch_true},
-        .branch_false{old_block->branch_false},
-        .function_call{old_block->function_call},
-        .return_block{old_block->return_block},
-        .branch_reg{old_block->branch_reg},
-        .branch_offset{old_block->branch_offset},
-        .indirect_branches{std::move(old_block->indirect_branches)},
-    };
-    *old_block = Block{
-        .begin{old_block->begin},
-        .end{pc},
-        .end_class{EndClass::Branch},
-        .cond{true},
-        .stack{std::move(old_block->stack)},
-        .branch_true{new_block},
-        .branch_false{nullptr},
-        .function_call{},
-        .return_block{},
-        .branch_reg{},
-        .branch_offset{},
-        .indirect_branches{},
-    };
+    *new_block = Block{};
+    new_block->begin = pc;
+    new_block->end = old_block->end;
+    new_block->end_class = old_block->end_class,
+    new_block->cond = old_block->cond;
+    new_block->stack = old_block->stack;
+    new_block->branch_true = old_block->branch_true;
+    new_block->branch_false = old_block->branch_false;
+    new_block->function_call = old_block->function_call;
+    new_block->return_block = old_block->return_block;
+    new_block->branch_reg = old_block->branch_reg;
+    new_block->branch_offset = old_block->branch_offset;
+    new_block->indirect_branches = std::move(old_block->indirect_branches);
+
+    const Location old_begin{old_block->begin};
+    Stack old_stack{std::move(old_block->stack)};
+    *old_block = Block{};
+    old_block->begin = old_begin;
+    old_block->end = pc;
+    old_block->end_class = EndClass::Branch;
+    old_block->cond = IR::Condition(true);
+    old_block->stack = old_stack;
+    old_block->branch_true = new_block;
+    old_block->branch_false = nullptr;
 }
 
 Token OpcodeToken(Opcode opcode) {
@@ -141,7 +137,7 @@ std::string NameOf(const Block& block) {
 
 void Stack::Push(Token token, Location target) {
     entries.push_back({
-        .token{token},
+        .token = token,
         .target{target},
     });
 }
@@ -177,24 +173,17 @@ bool Block::Contains(Location pc) const noexcept {
 }
 
 Function::Function(ObjectPool<Block>& block_pool, Location start_address)
-    : entrypoint{start_address}, labels{{
-                                     .address{start_address},
-                                     .block{block_pool.Create(Block{
-                                         .begin{start_address},
-                                         .end{start_address},
-                                         .end_class{EndClass::Branch},
-                                         .cond{true},
-                                         .stack{},
-                                         .branch_true{nullptr},
-                                         .branch_false{nullptr},
-                                         .function_call{},
-                                         .return_block{},
-                                         .branch_reg{},
-                                         .branch_offset{},
-                                         .indirect_branches{},
-                                     })},
-                                     .stack{},
-                                 }} {}
+    : entrypoint{start_address} {
+    Label& label{labels.emplace_back()};
+    label.address = start_address;
+    label.block = block_pool.Create(Block{});
+    label.block->begin = start_address;
+    label.block->end = start_address;
+    label.block->end_class = EndClass::Branch;
+    label.block->cond = IR::Condition(true);
+    label.block->branch_true = nullptr;
+    label.block->branch_false = nullptr;
+}
 
 CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address)
     : env{env_}, block_pool{block_pool_}, program_start{start_address} {
@@ -327,7 +316,8 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
         // Insert the function into the list if it doesn't exist
         const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
         const bool exists{it != functions.end()};
-        const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()};
+        const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
+                                        : functions.size()};
         if (!exists) {
             functions.emplace_back(block_pool, cal_pc);
         }
@@ -362,20 +352,14 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
     }
     // Create a virtual block and a conditional block
     Block* const conditional_block{block_pool.Create()};
-    Block virtual_block{
-        .begin{block->begin.Virtual()},
-        .end{block->begin.Virtual()},
-        .end_class{EndClass::Branch},
-        .cond{cond},
-        .stack{block->stack},
-        .branch_true{conditional_block},
-        .branch_false{nullptr},
-        .function_call{},
-        .return_block{},
-        .branch_reg{},
-        .branch_offset{},
-        .indirect_branches{},
-    };
+    Block virtual_block{};
+    virtual_block.begin = block->begin.Virtual();
+    virtual_block.end = block->begin.Virtual();
+    virtual_block.end_class = EndClass::Branch;
+    virtual_block.stack = block->stack;
+    virtual_block.cond = cond;
+    virtual_block.branch_true = conditional_block;
+    virtual_block.branch_false = nullptr;
     // Save the contents of the visited block in the conditional block
     *conditional_block = std::move(*block);
     // Impersonate the visited block with a virtual block
@@ -444,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst,
         if (!is_absolute) {
             target += pc.Offset();
         }
-        target += brx_table->branch_offset;
+        target += static_cast<unsigned int>(brx_table->branch_offset);
         target += 8;
         targets.push_back(target);
     }
@@ -455,8 +439,8 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst,
     for (const u32 target : targets) {
         Block* const branch{AddLabel(block, block->stack, target, function_id)};
         block->indirect_branches.push_back({
-            .block{branch},
-            .address{target},
+            .block = branch,
+            .address = target,
         });
     }
     block->cond = IR::Condition{true};
@@ -523,23 +507,17 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function
     if (label_it != function.labels.end()) {
         return label_it->block;
     }
-    Block* const new_block{block_pool.Create(Block{
-        .begin{pc},
-        .end{pc},
-        .end_class{EndClass::Branch},
-        .cond{true},
-        .stack{stack},
-        .branch_true{nullptr},
-        .branch_false{nullptr},
-        .function_call{},
-        .return_block{},
-        .branch_reg{},
-        .branch_offset{},
-        .indirect_branches{},
-    })};
+    Block* const new_block{block_pool.Create()};
+    new_block->begin = pc;
+    new_block->end = pc;
+    new_block->end_class = EndClass::Branch;
+    new_block->cond = IR::Condition(true);
+    new_block->stack = stack;
+    new_block->branch_true = nullptr;
+    new_block->branch_false = nullptr;
     function.labels.push_back(Label{
         .address{pc},
-        .block{new_block},
+        .block = new_block,
         .stack{std::move(stack)},
     });
     return new_block;
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp
index bd85afa1e5..932d19c1d4 100644
--- a/src/shader_recompiler/frontend/maxwell/decode.cpp
+++ b/src/shader_recompiler/frontend/maxwell/decode.cpp
@@ -45,7 +45,7 @@ constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
             bit >>= 1;
         }
     }
-    return MaskValue{.mask{mask}, .value{value}};
+    return MaskValue{.mask = mask, .value = value};
 }
 
 struct InstEncoding {
@@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{
 #define INST(name, cute, encode)                                                                   \
     InstEncoding{                                                                                  \
         .mask_value{MaskValueFromEncoding(encode)},                                                \
-        .opcode{Opcode::name},                                                                     \
+        .opcode = Opcode::name,                                                                     \
     },
 #include "maxwell.inc"
 #undef INST
@@ -116,9 +116,9 @@ constexpr auto MakeFastLookupTableIndex(size_t index) {
         const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
         if ((index & mask) == value) {
             encodings.at(element) = InstInfo{
-                .high_mask{static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT)},
-                .high_value{static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT)},
-                .opcode{encoding.opcode},
+                .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
+                .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
+                .opcode = encoding.opcode,
             };
             ++element;
         }
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
index 96453509d5..008625cb37 100644
--- a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
@@ -97,11 +97,11 @@ std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env
     }
     const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
     return IndirectBranchTableInfo{
-        .cbuf_index{cbuf_index},
-        .cbuf_offset{cbuf_offset},
-        .num_entries{imnmx_immediate + 1},
-        .branch_offset{brx_offset},
-        .branch_reg{brx_reg},
+        .cbuf_index = cbuf_index,
+        .cbuf_offset = cbuf_offset,
+        .num_entries = imnmx_immediate + 1,
+        .branch_offset = brx_offset,
+        .branch_reg = brx_reg,
     };
 }
 
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index c804c2a8e9..02cef26455 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -558,7 +558,6 @@ private:
         const Node label{goto_stmt->label};
         const u32 label_id{label->id};
         const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)};
-        const auto type{label_nested_stmt->type};
 
         Tree loop_body;
         loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
@@ -566,7 +565,7 @@ private:
         Statement* const variable{pool.Create(Variable{}, label_id)};
         Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
         UpdateTreeUp(loop_stmt);
-        const Node loop_node{body.insert(goto_stmt, *loop_stmt)};
+        body.insert(goto_stmt, *loop_stmt);
 
         Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
         loop_stmt->children.push_front(*new_goto);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
index ac1433dea7..5a1b3a8fcb 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -31,9 +31,9 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
     const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
 
     const IR::FpControl control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(dadd.fp_rounding)},
-        .fmz_mode{IR::FmzMode::None},
+        .no_contraction = true,
+        .rounding = CastFpRounding(dadd.fp_rounding),
+        .fmz_mode = IR::FmzMode::None,
     };
 
     v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
index ff73218629..7238414962 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -25,9 +25,9 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s
     const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
 
     const IR::FpControl control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(dfma.fp_rounding)},
-        .fmz_mode{IR::FmzMode::None},
+        .no_contraction = true,
+        .rounding = CastFpRounding(dfma.fp_rounding),
+        .fmz_mode = IR::FmzMode::None,
     };
 
     v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
index 3e83d1c95c..4a49299a0b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -21,9 +21,9 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
 
     const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
     const IR::FpControl control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(dmul.fp_rounding)},
-        .fmz_mode{IR::FmzMode::None},
+        .no_contraction = true,
+        .rounding = CastFpRounding(dmul.fp_rounding),
+        .fmz_mode = IR::FmzMode::None,
     };
 
     v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
index b39950c849..b8c89810cb 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -23,9 +23,9 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin
     const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
     const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
     IR::FpControl control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(fp_rounding)},
-        .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = true,
+        .rounding = CastFpRounding(fp_rounding),
+        .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
     IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
     if (sat) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
index c02a40209e..80109ca0e5 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -19,8 +19,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o
     } const fcmp{insn};
 
     const IR::F32 zero{v.ir.Imm32(0.0f)};
-    const IR::F32 neg_zero{v.ir.Imm32(-0.0f)};
-    const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}};
+    const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
     const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
     const IR::U32 src_reg{v.X(fcmp.src_reg)};
     const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
index c5417775e1..b9f4ee0d9b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -29,9 +29,9 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
     const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
     const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
     const IR::FpControl control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
 
     IR::U1 pred{v.ir.GetPred(fset.pred)};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
index 1e366fde03..035f8782a7 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -57,9 +57,9 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
 
     const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
     IR::FpControl fp_control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
     if (f2f.src_size != f2f.dst_size) {
         fp_control.rounding = CastFpRounding(f2f.rounding);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
index 21ae92be1e..cf3cf1ba69 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -123,9 +123,9 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
         fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
     }
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{fmz_mode},
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = fmz_mode,
     };
     const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
     const IR::F16F32F64 rounded_value{[&] {
@@ -186,14 +186,14 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
         } else if (f2i.dest_format == DestFormat::I64) {
             handled_special_case = true;
             result = IR::U64{
-                v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)};
+                v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
         }
     }
     if (!handled_special_case && is_signed) {
         if (bitsize != 64) {
             result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
         } else {
-            result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)};
+            result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)};
         }
     }
 
@@ -211,6 +211,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
 
 void TranslatorVisitor::F2I_reg(u64 insn) {
     union {
+        u64 raw;
         F2I base;
         BitField<20, 8, IR::Reg> src_reg;
     } const f2i{insn};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
index 18561bc9c7..fa2a7807b7 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -24,9 +24,9 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s
     const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
     const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(fp_rounding)},
-        .fmz_mode{CastFmzMode(fmz_mode)},
+        .no_contraction = true,
+        .rounding = CastFpRounding(fp_rounding),
+        .fmz_mode = CastFmzMode(fmz_mode),
     };
     IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
     if (fmz_mode == FmzMode::FMZ && !sat) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
index 343d91032b..8ae4375287 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -27,9 +27,9 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
     const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
 
     const IR::FpControl control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
     IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
     IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
index 72f0a18ae8..06226b7ce2 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -64,9 +64,9 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode
     }
     const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(fp_rounding)},
-        .fmz_mode{CastFmzMode(fmz_mode)},
+        .no_contraction = true,
+        .rounding = CastFpRounding(fp_rounding),
+        .fmz_mode = CastFmzMode(fmz_mode),
     };
     IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
     if (fmz_mode == FmzMode::FMZ && !sat) {
@@ -124,4 +124,4 @@ void TranslatorVisitor::FMUL32I(u64 insn) {
          fmul32i.sat != 0, fmul32i.cc != 0, false);
 }
 
-} // namespace Shader::Maxwell
\ No newline at end of file
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
index 8ff9db8438..5f93a15130 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -29,9 +29,9 @@ void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
     const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
     const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
     const IR::FpControl control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
 
     const BooleanOp bop{fsetp.bop};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
index e42921a216..7550a8d4c4 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -28,9 +28,9 @@ void TranslatorVisitor::FSWZADD(u64 insn) {
     const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
 
     const IR::FpControl fp_control{
-        .no_contraction{false},
-        .rounding{CastFpRounding(fswzadd.round)},
-        .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = CastFpRounding(fswzadd.round),
+        .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
 
     const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
index 03e7bf047d..f2738a93b2 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -34,9 +34,9 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool
     rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
 
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
     IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
     IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
@@ -102,8 +102,9 @@ void TranslatorVisitor::HADD2_imm(u64 insn) {
         BitField<20, 9, u64> low;
     } const hadd2{insn};
 
-    const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) |
-                  static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)};
+    const u32 imm{
+        static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
     HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
 }
 
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
index 8b234bd6ae..fd79867016 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -41,9 +41,9 @@ void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool
     rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
 
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{HalfPrecision2FmzMode(precision)},
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = HalfPrecision2FmzMode(precision),
     };
     IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
     IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
@@ -143,8 +143,9 @@ void TranslatorVisitor::HFMA2_imm(u64 insn) {
         BitField<57, 2, HalfPrecision> precision;
     } const hfma2{insn};
 
-    const u32 imm{static_cast<u32>(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) |
-                  static_cast<u32>(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)};
+    const u32 imm{
+        static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
 
     HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
           GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
index 2451a6ef68..3f548ce761 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -35,9 +35,9 @@ void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bo
     rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
 
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{HalfPrecision2FmzMode(precision)},
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = HalfPrecision2FmzMode(precision),
     };
     IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
     IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
@@ -119,8 +119,9 @@ void TranslatorVisitor::HMUL2_imm(u64 insn) {
         BitField<44, 1, u64> abs_a;
     } const hmul2{insn};
 
-    const u32 imm{static_cast<u32>(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) |
-                  static_cast<u32>(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)};
+    const u32 imm{
+        static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
     HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
           Swizzle::H1_H0, ir.Imm32(imm));
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
index 7f1f4b88c8..cca5b831fd 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -41,9 +41,9 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f
     rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
 
     const IR::FpControl control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
 
     IR::U1 pred{v.ir.GetPred(hset2.pred)};
@@ -106,8 +106,9 @@ void TranslatorVisitor::HSET2_imm(u64 insn) {
         BitField<20, 9, u64> low;
     } const hset2{insn};
 
-    const u32 imm{static_cast<u32>(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) |
-                  static_cast<u32>(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)};
+    const u32 imm{
+        static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
 
     HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
           Swizzle::H1_H0);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
index 3e2a23c92d..b3931dae32 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -43,9 +43,9 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo
     rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
 
     const IR::FpControl control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
 
     IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
@@ -106,8 +106,10 @@ void TranslatorVisitor::HSETP2_imm(u64 insn) {
         BitField<20, 9, u64> low;
     } const hsetp2{insn};
 
-    const u32 imm{static_cast<u32>(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
-                  static_cast<u32>(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
+    const u32 imm{static_cast<u32>(hsetp2.low << 6) |
+                  static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
+                  static_cast<u32>(hsetp2.high << 22) |
+                  static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
 
     HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
            hsetp2.h_and != 0);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 30b570ce4d..88bbac0a50 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -49,7 +49,7 @@ void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
     }
     const IR::Value result{ir.UnpackUint2x32(value)};
     for (int i = 0; i < 2; i++) {
-        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
+        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
     }
 }
 
@@ -63,7 +63,7 @@ void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
     }
     const IR::Value result{ir.UnpackDouble2x32(value)};
     for (int i = 0; i < 2; i++) {
-        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
+        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
     }
 }
 
@@ -156,7 +156,7 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
     const auto [binding, offset_value]{CbufAddr(insn)};
     const bool unaligned{cbuf.unaligned != 0};
     const u32 offset{offset_value.U32()};
-    const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4};
+    const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
 
     const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
     const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
@@ -200,7 +200,7 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
         BitField<20, 19, u64> value;
         BitField<56, 1, u64> is_negative;
     } const imm{insn};
-    const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0};
+    const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
     const u32 value{static_cast<u32>(imm.value) << 12};
     return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
index 1493e18151..8ffd84867d 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -68,7 +68,6 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
     } const iadd{insn};
 
     const bool po{iadd.three_for_po == 3};
-    const bool neg_a{!po && iadd.neg_a != 0};
     if (!po && iadd.neg_b != 0) {
         op_b = v.ir.INeg(op_b);
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
index e8b5ae1d2d..5a0fc36a03 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -131,7 +131,7 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
         }
         const IR::Value vector{v.ir.UnpackDouble2x32(value)};
         for (int i = 0; i < 2; ++i) {
-            v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)});
+            v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
         }
         break;
     }
@@ -170,4 +170,4 @@ void TranslatorVisitor::I2F_imm(u64 insn) {
     }
 }
 
-} // namespace Shader::Maxwell
\ No newline at end of file
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
index ae3ecea325..2300088e38 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -50,7 +50,7 @@ void TranslatorVisitor::LDC(u64 insn) {
         }
         const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
         for (int i = 0; i < 2; ++i) {
-            X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
+            X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
         }
         break;
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
index 68963c8ea6..e24b497210 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -40,7 +40,6 @@ std::pair<int, bool> GetSize(u64 insn) {
         BitField<48, 3, Size> size;
     } const encoding{insn};
 
-    const Size nnn = encoding.size;
     switch (encoding.size) {
     case Size::U8:
         return {8, false};
@@ -99,7 +98,7 @@ void TranslatorVisitor::LDL(u64 insn) {
     case 32:
     case 64:
     case 128:
-        if (!IR::IsAligned(dest, bit_size / 32)) {
+        if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
             throw NotImplementedException("Unaligned destination register {}", dest);
         }
         X(dest, ir.LoadLocal(word_offset));
@@ -123,11 +122,11 @@ void TranslatorVisitor::LDS(u64 insn) {
         break;
     case 64:
     case 128:
-        if (!IR::IsAligned(dest, bit_size / 32)) {
+        if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
             throw NotImplementedException("Unaligned destination register {}", dest);
         }
         for (int element = 0; element < bit_size / 32; ++element) {
-            X(dest + element, IR::U32{ir.CompositeExtract(value, element)});
+            X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
         }
         break;
     }
@@ -156,7 +155,7 @@ void TranslatorVisitor::STL(u64 insn) {
     case 32:
     case 64:
     case 128:
-        if (!IR::IsAligned(reg, bit_size / 32)) {
+        if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
             throw NotImplementedException("Unaligned source register");
         }
         ir.WriteLocal(word_offset, src);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
index 71688b1d78..36c5cff2f1 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) {
         }
         const IR::Value vector{ir.LoadGlobal64(address)};
         for (int i = 0; i < 2; ++i) {
-            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
+            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
         }
         break;
     }
@@ -125,7 +125,7 @@ void TranslatorVisitor::LDG(u64 insn) {
         }
         const IR::Value vector{ir.LoadGlobal128(address)};
         for (int i = 0; i < 4; ++i) {
-            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
+            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
         }
         break;
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
index b2da079f9c..95d4165863 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -199,7 +199,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
         if (tex.dc != 0) {
             value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
         } else {
-            value = IR::F32{v.ir.CompositeExtract(sample, element)};
+            value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
         }
         v.F(dest_reg, value);
         ++dest_reg;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
index d5fda20f42..fe2c7db85d 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -53,7 +53,7 @@ constexpr std::array RGBA_LUT{
     R | G | B | A, //
 };
 
-void CheckAlignment(IR::Reg reg, int alignment) {
+void CheckAlignment(IR::Reg reg, size_t alignment) {
     if (!IR::IsAligned(reg, alignment)) {
         throw NotImplementedException("Unaligned source register {}", reg);
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
index beab515ad9..2ba9c1018a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -37,7 +37,7 @@ union Encoding {
     BitField<36, 13, u64> cbuf_offset;
 };
 
-void CheckAlignment(IR::Reg reg, int alignment) {
+void CheckAlignment(IR::Reg reg, size_t alignment) {
     if (!IR::IsAligned(reg, alignment)) {
         throw NotImplementedException("Unaligned source register {}", reg);
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
index 623b8fc23b..0863bdfcd4 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -56,7 +56,7 @@ union Encoding {
     BitField<53, 4, u64> encoding;
 };
 
-void CheckAlignment(IR::Reg reg, int alignment) {
+void CheckAlignment(IR::Reg reg, size_t alignment) {
     if (!IR::IsAligned(reg, alignment)) {
         throw NotImplementedException("Unaligned source register {}", reg);
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
index 8c7e04bcab..0459e5473e 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -54,7 +54,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
         if (((txq.mask >> element) & 1) == 0) {
             continue;
         }
-        v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)});
+        v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
         ++dest_reg;
     }
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
index af13b3fccf..ec5e74f6d8 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -69,7 +69,6 @@ void TranslatorVisitor::VSETP(u64 insn) {
     const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
 
     const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
-    const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vsetp.src_b_selector)};
     const VideoWidth a_width{vsetp.src_a_width};
     const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
 
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 1c03ee82af..edbfcd3082 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -6,6 +6,7 @@
 #include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/modifiers.h"
 #include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/ir_opt/passes.h"
 #include "shader_recompiler/shader_info.h"
 
 namespace Shader::Optimization {
@@ -22,8 +23,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
     auto& cbufs{info.constant_buffer_descriptors};
     cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
                  ConstantBufferDescriptor{
-                     .index{index},
-                     .count{1},
+                     .index = index,
+                     .count = 1,
                  });
 }
 
@@ -91,7 +92,7 @@ void SetAttribute(Info& info, IR::Attribute attribute) {
 }
 
 void VisitUsages(Info& info, IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::CompositeConstructF16x2:
     case IR::Opcode::CompositeConstructF16x3:
     case IR::Opcode::CompositeConstructF16x4:
@@ -209,7 +210,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     default:
         break;
     }
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::GetCbufU8:
     case IR::Opcode::GetCbufS8:
     case IR::Opcode::UndefU8:
@@ -236,7 +237,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     default:
         break;
     }
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::GetCbufU16:
     case IR::Opcode::GetCbufS16:
     case IR::Opcode::UndefU16:
@@ -271,7 +272,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     default:
         break;
     }
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::UndefU64:
     case IR::Opcode::LoadGlobalU8:
     case IR::Opcode::LoadGlobalS8:
@@ -314,7 +315,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     default:
         break;
     }
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::DemoteToHelperInvocation:
         info.uses_demote_to_helper_invocation = true;
         break;
@@ -361,7 +362,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
         } else {
             throw NotImplementedException("Constant buffer with non-immediate index");
         }
-        switch (inst.Opcode()) {
+        switch (inst.GetOpcode()) {
         case IR::Opcode::GetCbufU8:
         case IR::Opcode::GetCbufS8:
             info.used_constant_buffer_types |= IR::Type::U8;
@@ -443,7 +444,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
 }
 
 void VisitFpModifiers(Info& info, IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::FPAdd16:
     case IR::Opcode::FPFma16:
     case IR::Opcode::FPMul16:
@@ -540,7 +541,6 @@ void GatherInfoFromHeader(Environment& env, Info& info) {
         info.stores_position |= header.vtg.omap_systemb.position != 0;
     }
 }
-
 } // Anonymous namespace
 
 void CollectShaderInfoPass(Environment& env, IR::Program& program) {
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 1720d7a092..61fbbe04cb 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -58,7 +58,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
     }
     if (is_lhs_immediate && !is_rhs_immediate) {
         IR::Inst* const rhs_inst{rhs.InstRecursive()};
-        if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) {
+        if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
             const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
             inst.SetArg(0, rhs_inst->Arg(0));
             inst.SetArg(1, IR::Value{combined});
@@ -70,7 +70,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
     }
     if (!is_lhs_immediate && is_rhs_immediate) {
         const IR::Inst* const lhs_inst{lhs.InstRecursive()};
-        if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) {
+        if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
             const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
             inst.SetArg(0, lhs_inst->Arg(0));
             inst.SetArg(1, IR::Value{combined});
@@ -123,7 +123,8 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
         return false;
     }
     IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
-    if (lhs_shl->Opcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) {
+    if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
+        lhs_shl->Arg(1) != IR::Value{16U}) {
         return false;
     }
     if (lhs_shl->Arg(0).IsImmediate()) {
@@ -131,7 +132,7 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
     }
     IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
     IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
-    if (lhs_mul->Opcode() != IR::Opcode::IMul32 || rhs_mul->Opcode() != IR::Opcode::IMul32) {
+    if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
         return false;
     }
     if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
@@ -143,10 +144,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
     }
     IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
     IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
-    if (lhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) {
+    if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
         return false;
     }
-    if (rhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) {
+    if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
         return false;
     }
     if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
@@ -194,8 +195,9 @@ void FoldISub32(IR::Inst& inst) {
     // ISub32 is generally used to subtract two constant buffers, compare and replace this with
     // zero if they equal.
     const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
-        return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 &&
-               a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1);
+        return a->GetOpcode() == IR::Opcode::GetCbufU32 &&
+               b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) &&
+               a->Arg(1) == b->Arg(1);
     }};
     IR::Inst* op_a{inst.Arg(0).InstRecursive()};
     IR::Inst* op_b{inst.Arg(1).InstRecursive()};
@@ -204,15 +206,15 @@ void FoldISub32(IR::Inst& inst) {
         return;
     }
     // It's also possible a value is being added to a cbuf and then subtracted
-    if (op_b->Opcode() == IR::Opcode::IAdd32) {
+    if (op_b->GetOpcode() == IR::Opcode::IAdd32) {
         // Canonicalize local variables to simplify the following logic
         std::swap(op_a, op_b);
     }
-    if (op_b->Opcode() != IR::Opcode::GetCbufU32) {
+    if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) {
         return;
     }
     IR::Inst* const inst_cbuf{op_b};
-    if (op_a->Opcode() != IR::Opcode::IAdd32) {
+    if (op_a->GetOpcode() != IR::Opcode::IAdd32) {
         return;
     }
     IR::Value add_op_a{op_a->Arg(0)};
@@ -250,7 +252,8 @@ void FoldFPMul32(IR::Inst& inst) {
     }
     IR::Inst* const lhs_op{lhs_value.InstRecursive()};
     IR::Inst* const rhs_op{rhs_value.InstRecursive()};
-    if (lhs_op->Opcode() != IR::Opcode::FPMul32 || rhs_op->Opcode() != IR::Opcode::FPRecip32) {
+    if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
+        rhs_op->GetOpcode() != IR::Opcode::FPRecip32) {
         return;
     }
     const IR::Value recip_source{rhs_op->Arg(0)};
@@ -260,8 +263,8 @@ void FoldFPMul32(IR::Inst& inst) {
     }
     IR::Inst* const attr_a{recip_source.InstRecursive()};
     IR::Inst* const attr_b{lhs_mul_source.InstRecursive()};
-    if (attr_a->Opcode() != IR::Opcode::GetAttribute ||
-        attr_b->Opcode() != IR::Opcode::GetAttribute) {
+    if (attr_a->GetOpcode() != IR::Opcode::GetAttribute ||
+        attr_b->GetOpcode() != IR::Opcode::GetAttribute) {
         return;
     }
     if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) {
@@ -304,7 +307,7 @@ void FoldLogicalNot(IR::Inst& inst) {
         return;
     }
     IR::Inst* const arg{value.InstRecursive()};
-    if (arg->Opcode() == IR::Opcode::LogicalNot) {
+    if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
         inst.ReplaceUsesWith(arg->Arg(0));
     }
 }
@@ -317,12 +320,12 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
         return;
     }
     IR::Inst* const arg_inst{value.InstRecursive()};
-    if (arg_inst->Opcode() == reverse) {
+    if (arg_inst->GetOpcode() == reverse) {
         inst.ReplaceUsesWith(arg_inst->Arg(0));
         return;
     }
     if constexpr (op == IR::Opcode::BitCastF32U32) {
-        if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) {
+        if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) {
             // Replace the bitcast with a typed constant buffer read
             inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
             inst.SetArg(0, arg_inst->Arg(0));
@@ -338,7 +341,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
         return;
     }
     IR::Inst* const arg_inst{value.InstRecursive()};
-    if (arg_inst->Opcode() == reverse) {
+    if (arg_inst->GetOpcode() == reverse) {
         inst.ReplaceUsesWith(arg_inst->Arg(0));
         return;
     }
@@ -347,7 +350,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
 template <typename Func, size_t... I>
 IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
     using Traits = LambdaTraits<decltype(func)>;
-    return IR::Value{func(Arg<Traits::ArgType<I>>(inst.Arg(I))...)};
+    return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
 }
 
 void FoldBranchConditional(IR::Inst& inst) {
@@ -357,7 +360,7 @@ void FoldBranchConditional(IR::Inst& inst) {
         return;
     }
     const IR::Inst* cond_inst{cond.InstRecursive()};
-    if (cond_inst->Opcode() == IR::Opcode::LogicalNot) {
+    if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) {
         const IR::Value true_label{inst.Arg(1)};
         const IR::Value false_label{inst.Arg(2)};
         // Remove negation on the conditional (take the parameter out of LogicalNot) and swap
@@ -371,10 +374,10 @@ void FoldBranchConditional(IR::Inst& inst) {
 std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
                                                   IR::Opcode construct, u32 first_index) {
     IR::Inst* const inst{inst_value.InstRecursive()};
-    if (inst->Opcode() == construct) {
+    if (inst->GetOpcode() == construct) {
         return inst->Arg(first_index);
     }
-    if (inst->Opcode() != insert) {
+    if (inst->GetOpcode() != insert) {
         return std::nullopt;
     }
     IR::Value value_index{inst->Arg(2)};
@@ -410,7 +413,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser
 }
 
 void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::GetRegister:
         return FoldGetRegister(inst);
     case IR::Opcode::GetPred:
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 0858a0bddd..90a65dd167 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -57,7 +57,7 @@ struct StorageInfo {
 
 /// Returns true when the instruction is a global memory instruction
 bool IsGlobalMemory(const IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::LoadGlobalS8:
     case IR::Opcode::LoadGlobalU8:
     case IR::Opcode::LoadGlobalS16:
@@ -80,7 +80,7 @@ bool IsGlobalMemory(const IR::Inst& inst) {
 
 /// Returns true when the instruction is a global memory instruction
 bool IsGlobalMemoryWrite(const IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::WriteGlobalS8:
     case IR::Opcode::WriteGlobalU8:
     case IR::Opcode::WriteGlobalS16:
@@ -140,7 +140,7 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce
 void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) {
     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
     const IR::Value zero{u32{0}};
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::LoadGlobalS8:
     case IR::Opcode::LoadGlobalU8:
     case IR::Opcode::LoadGlobalS16:
@@ -164,7 +164,7 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) {
         inst.Invalidate();
         break;
     default:
-        throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode());
+        throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode());
     }
 }
 
@@ -184,7 +184,7 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
     // This address is expected to either be a PackUint2x32 or a IAdd64
     IR::Inst* addr_inst{addr.InstRecursive()};
     s32 imm_offset{0};
-    if (addr_inst->Opcode() == IR::Opcode::IAdd64) {
+    if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
         // If it's an IAdd64, get the immediate offset it is applying and grab the address
         // instruction. This expects for the instruction to be canonicalized having the address on
         // the first argument and the immediate offset on the second one.
@@ -200,7 +200,7 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
         addr_inst = iadd_addr.Inst();
     }
     // With IAdd64 handled, now PackUint2x32 is expected without exceptions
-    if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) {
+    if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) {
         return std::nullopt;
     }
     // PackUint2x32 is expected to be generated from a vector
@@ -210,20 +210,20 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
     }
     // This vector is expected to be a CompositeConstructU32x2
     IR::Inst* const vector_inst{vector.InstRecursive()};
-    if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) {
+    if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
         return std::nullopt;
     }
     // Grab the first argument from the CompositeConstructU32x2, this is the low address.
     return LowAddrInfo{
         .value{IR::U32{vector_inst->Arg(0)}},
-        .imm_offset{imm_offset},
+        .imm_offset = imm_offset,
     };
 }
 
 /// Tries to track the storage buffer address used by a global memory instruction
 std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
     const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
-        if (inst->Opcode() != IR::Opcode::GetCbufU32) {
+        if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
             return std::nullopt;
         }
         const IR::Value index{inst->Arg(0)};
@@ -256,9 +256,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
     // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
     // avoid getting false positives
     static constexpr Bias nvn_bias{
-        .index{0},
-        .offset_begin{0x110},
-        .offset_end{0x610},
+        .index = 0,
+        .offset_begin = 0x110,
+        .offset_end = 0x610,
     };
     // Track the low address of the instruction
     const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
@@ -286,8 +286,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
     info.set.insert(*storage_buffer);
     info.to_replace.push_back(StorageInst{
         .storage_buffer{*storage_buffer},
-        .inst{&inst},
-        .block{&block},
+        .inst = &inst,
+        .block = &block,
     });
 }
 
@@ -312,7 +312,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
 /// Replace a global memory load instruction with its storage buffer equivalent
 void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
                  const IR::U32& offset) {
-    const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())};
+    const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
     const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
     const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
     inst.ReplaceUsesWith(value);
@@ -321,7 +321,7 @@ void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
 /// Replace a global memory write instruction with its storage buffer equivalent
 void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
                   const IR::U32& offset) {
-    const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())};
+    const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
     const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
     block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
     inst.Invalidate();
@@ -330,7 +330,7 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index
 /// Replace a global memory instruction with its storage buffer equivalent
 void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
              const IR::U32& offset) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::LoadGlobalS8:
     case IR::Opcode::LoadGlobalU8:
     case IR::Opcode::LoadGlobalS16:
@@ -348,7 +348,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
     case IR::Opcode::WriteGlobal128:
         return ReplaceWrite(block, inst, storage_index, offset);
     default:
-        throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode());
+        throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
     }
 }
 } // Anonymous namespace
@@ -366,9 +366,9 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
     u32 storage_index{};
     for (const StorageBufferAddr& storage_buffer : info.set) {
         program.info.storage_buffers_descriptors.push_back({
-            .cbuf_index{storage_buffer.index},
-            .cbuf_offset{storage_buffer.offset},
-            .count{1},
+            .cbuf_index = storage_buffer.index,
+            .cbuf_offset = storage_buffer.offset,
+            .count = 1,
             .is_written{info.writes.contains(storage_buffer)},
         });
         ++storage_index;
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
index 8790b48f21..38af72dfea 100644
--- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -22,7 +22,8 @@ void IdentityRemovalPass(IR::Program& program) {
                     inst->SetArg(i, arg.Inst()->Arg(0));
                 }
             }
-            if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) {
+            if (inst->GetOpcode() == IR::Opcode::Identity ||
+                inst->GetOpcode() == IR::Opcode::Void) {
                 to_invalidate.push_back(&*inst);
                 inst = block->Instructions().erase(inst);
             } else {
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
index 0d2c91ed61..52576b07fc 100644
--- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -123,7 +123,7 @@ IR::Opcode Replace(IR::Opcode op) {
 void LowerFp16ToFp32(IR::Program& program) {
     for (IR::Block* const block : program.blocks) {
         for (IR::Inst& inst : block->Instructions()) {
-            inst.ReplaceOpcode(Replace(inst.Opcode()));
+            inst.ReplaceOpcode(Replace(inst.GetOpcode()));
         }
     }
 }
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index ca36253d14..346fcc3774 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -116,7 +116,7 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
 }
 
 [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {
-    return inst.Opcode() == IR::Opcode::Phi;
+    return inst.GetOpcode() == IR::Opcode::Phi;
 }
 
 enum class Status {
@@ -278,7 +278,7 @@ private:
 };
 
 void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::SetRegister:
         if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
             pass.WriteVariable(reg, block, inst.Arg(1));
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index 290ce41791..c8aee3d3d5 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -30,7 +30,7 @@ struct TextureInst {
 using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
 
 IR::Opcode IndexedInstruction(const IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::BindlessImageSampleImplicitLod:
     case IR::Opcode::BoundImageSampleImplicitLod:
         return IR::Opcode::ImageSampleImplicitLod;
@@ -67,7 +67,7 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) {
 }
 
 bool IsBindless(const IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::BindlessImageSampleImplicitLod:
     case IR::Opcode::BindlessImageSampleExplicitLod:
     case IR::Opcode::BindlessImageSampleDrefImplicitLod:
@@ -91,7 +91,7 @@ bool IsBindless(const IR::Inst& inst) {
     case IR::Opcode::BoundImageGradient:
         return false;
     default:
-        throw InvalidArgument("Invalid opcode {}", inst.Opcode());
+        throw InvalidArgument("Invalid opcode {}", inst.GetOpcode());
     }
 }
 
@@ -100,7 +100,7 @@ bool IsTextureInstruction(const IR::Inst& inst) {
 }
 
 std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
-    if (inst->Opcode() != IR::Opcode::GetCbufU32) {
+    if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
         return std::nullopt;
     }
     const IR::Value index{inst->Arg(0)};
@@ -134,14 +134,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
         addr = *track_addr;
     } else {
         addr = ConstBufferAddr{
-            .index{env.TextureBoundBuffer()},
-            .offset{inst.Arg(0).U32()},
+            .index = env.TextureBoundBuffer(),
+            .offset = inst.Arg(0).U32(),
         };
     }
     return TextureInst{
         .cbuf{addr},
-        .inst{&inst},
-        .block{block},
+        .inst = &inst,
+        .block = block,
     };
 }
 
@@ -211,7 +211,7 @@ void TexturePass(Environment& env, IR::Program& program) {
 
         const auto& cbuf{texture_inst.cbuf};
         auto flags{inst->Flags<IR::TextureInstInfo>()};
-        switch (inst->Opcode()) {
+        switch (inst->GetOpcode()) {
         case IR::Opcode::ImageQueryDimensions:
             flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset));
             inst->SetFlags(flags);
@@ -235,16 +235,16 @@ void TexturePass(Environment& env, IR::Program& program) {
         u32 index;
         if (flags.type == TextureType::Buffer) {
             index = descriptors.Add(TextureBufferDescriptor{
-                .cbuf_index{cbuf.index},
-                .cbuf_offset{cbuf.offset},
-                .count{1},
+                .cbuf_index = cbuf.index,
+                .cbuf_offset = cbuf.offset,
+                .count = 1,
             });
         } else {
             index = descriptors.Add(TextureDescriptor{
-                .type{flags.type},
-                .cbuf_index{cbuf.index},
-                .cbuf_offset{cbuf.offset},
-                .count{1},
+                .type = flags.type,
+                .cbuf_index = cbuf.index,
+                .cbuf_offset = cbuf.offset,
+                .count = 1,
             });
         }
         inst->SetArg(0, IR::Value{index});
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
index 4080b37cca..dbec96d84a 100644
--- a/src/shader_recompiler/ir_opt/verification_pass.cpp
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -14,14 +14,14 @@ namespace Shader::Optimization {
 static void ValidateTypes(const IR::Program& program) {
     for (const auto& block : program.blocks) {
         for (const IR::Inst& inst : *block) {
-            if (inst.Opcode() == IR::Opcode::Phi) {
+            if (inst.GetOpcode() == IR::Opcode::Phi) {
                 // Skip validation on phi nodes
                 continue;
             }
             const size_t num_args{inst.NumArgs()};
             for (size_t i = 0; i < num_args; ++i) {
                 const IR::Type t1{inst.Arg(i).Type()};
-                const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)};
+                const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
                 if (!IR::AreTypesCompatible(t1, t2)) {
                     throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
                 }
diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp
index ac9912738a..aa6e865934 100644
--- a/src/tests/common/unique_function.cpp
+++ b/src/tests/common/unique_function.cpp
@@ -17,10 +17,12 @@ struct Noisy {
     Noisy& operator=(Noisy&& rhs) noexcept {
         state = "Move assigned";
         rhs.state = "Moved away";
+        return *this;
     }
     Noisy(const Noisy&) : state{"Copied constructed"} {}
     Noisy& operator=(const Noisy&) {
         state = "Copied assigned";
+        return *this;
     }
 
     std::string state;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 71b07c1940..3166a69dc1 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -203,7 +203,7 @@ add_library(video_core STATIC
 create_target_directory_groups(video_core)
 
 target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak)
+target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak)
 
 if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
     add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 893258b4aa..57e2d569c2 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -447,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
         .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
         .pDynamicStates = dynamic_states.data(),
     };
-    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+    [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
         .pNext = nullptr,
         .requiredSubgroupSize = GuestWarpSize,
@@ -457,15 +457,16 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
         if (!spv_modules[stage]) {
             continue;
         }
-        [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
-            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-            .pNext = nullptr,
-            .flags = 0,
-            .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)),
-            .module = *spv_modules[stage],
-            .pName = "main",
-            .pSpecializationInfo = nullptr,
-        });
+        [[maybe_unused]] auto& stage_ci =
+            shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .pNext = nullptr,
+                .flags = 0,
+                .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)),
+                .module = *spv_modules[stage],
+                .pName = "main",
+                .pSpecializationInfo = nullptr,
+            });
         /*
         if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
             stage_ci.pNext = &subgroup_size_ci;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 23bf84a92f..fcebb8f6e2 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -47,7 +47,7 @@ auto MakeSpan(Container& container) {
     return std::span(container.data(), container.size());
 }
 
-u64 MakeCbufKey(u32 index, u32 offset) {
+static u64 MakeCbufKey(u32 index, u32 offset) {
     return (static_cast<u64>(index) << 32) | offset;
 }
 
@@ -638,6 +638,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
         .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
         .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
         .generic_input_types{},
+        .fixed_state_point_size{},
     };
 }
 
@@ -748,7 +749,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
         Shader::Environment& env{*envs[env_index]};
         ++env_index;
 
-        const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)};
+        const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
         Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
         programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
     }
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
index b2dcd74ab9..991afe521e 100644
--- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#pragma once
-
 #include <unordered_map>
 
 #include <boost/container/static_vector.hpp>
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index e42b091c5f..70328680dd 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -279,7 +279,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     };
 }
 
-[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
+[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
     std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
     std::vector<VkBufferCopy> result(copies.size());
     std::ranges::transform(
-- 
cgit v1.2.3-70-g09d2


From d404b871d595794184b8d80fc05682eb6e2792fe Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 10 Apr 2021 16:46:26 -0300
Subject: shader: Mark ImageWrite with side effects

---
 src/shader_recompiler/frontend/ir/microinstruction.cpp | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index ceb44e6042..2df631791e 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -93,6 +93,9 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::WriteSharedU32:
     case Opcode::WriteSharedU64:
     case Opcode::WriteSharedU128:
+    case Opcode::BindlessImageWrite:
+    case Opcode::BoundImageWrite:
+    case Opcode::ImageWrite:
         return true;
     default:
         return false;
-- 
cgit v1.2.3-70-g09d2


From 3db2b3effa953ae66457b7a19b419fc4db2c4801 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sun, 11 Apr 2021 02:07:02 -0400
Subject: shader: Implement ATOM/S and RED

---
 src/shader_recompiler/CMakeLists.txt               |   3 +
 .../backend/spirv/emit_context.cpp                 | 158 +++++-
 src/shader_recompiler/backend/spirv/emit_context.h |  20 +
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |   3 +
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  95 ++++
 .../backend/spirv/emit_spirv_atomic.cpp            | 528 +++++++++++++++++++++
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 200 +++++++-
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  39 ++
 .../frontend/ir/microinstruction.cpp               |  66 +++
 src/shader_recompiler/frontend/ir/opcodes.inc      |  70 +++
 .../impl/atomic_operations_global_memory.cpp       | 222 +++++++++
 .../impl/atomic_operations_shared_memory.cpp       | 110 +++++
 .../maxwell/translate/impl/not_implemented.cpp     |  12 -
 .../ir_opt/collect_shader_info_pass.cpp            |  70 +++
 .../global_memory_to_storage_buffer_pass.cpp       | 121 ++++-
 .../ir_opt/lower_fp16_to_fp32.cpp                  |  12 +
 src/shader_recompiler/profile.h                    |   1 +
 src/shader_recompiler/shader_info.h                |  13 +
 .../renderer_vulkan/vk_pipeline_cache.cpp          |   1 +
 src/video_core/vulkan_common/vulkan_device.cpp     |  14 +
 src/video_core/vulkan_common/vulkan_device.h       |   6 +
 21 files changed, 1745 insertions(+), 19 deletions(-)
 create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 8e1d37373d..7b9f08aa03 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -3,6 +3,7 @@ add_library(shader_recompiler STATIC
     backend/spirv/emit_context.h
     backend/spirv/emit_spirv.cpp
     backend/spirv/emit_spirv.h
+    backend/spirv/emit_spirv_atomic.cpp
     backend/spirv/emit_spirv_barriers.cpp
     backend/spirv/emit_spirv_bitwise_conversion.cpp
     backend/spirv/emit_spirv_composite.cpp
@@ -65,6 +66,8 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/program.h
     frontend/maxwell/structured_control_flow.cpp
     frontend/maxwell/structured_control_flow.h
+    frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
+    frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
     frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
     frontend/maxwell/translate/impl/barrier_operations.cpp
     frontend/maxwell/translate/impl/bitfield_extract.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 32f8c4508f..e5d83e9b4b 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -15,6 +15,53 @@
 
 namespace Shader::Backend::SPIRV {
 namespace {
+enum class CasFunctionType {
+    Increment,
+    Decrement,
+    FPAdd,
+    FPMin,
+    FPMax,
+};
+
+Id CasFunction(EmitContext& ctx, CasFunctionType function_type, Id value_type) {
+    const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)};
+    const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
+    const Id op_a{ctx.OpFunctionParameter(value_type)};
+    const Id op_b{ctx.OpFunctionParameter(value_type)};
+    ctx.AddLabel();
+    Id result{};
+    switch (function_type) {
+    case CasFunctionType::Increment: {
+        const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)};
+        const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))};
+        result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr);
+        break;
+    }
+    case CasFunctionType::Decrement: {
+        const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))};
+        const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)};
+        const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)};
+        const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))};
+        result = ctx.OpSelect(value_type, pred, op_b, decr);
+        break;
+    }
+    case CasFunctionType::FPAdd:
+        result = ctx.OpFAdd(value_type, op_a, op_b);
+        break;
+    case CasFunctionType::FPMin:
+        result = ctx.OpFMin(value_type, op_a, op_b);
+        break;
+    case CasFunctionType::FPMax:
+        result = ctx.OpFMax(value_type, op_a, op_b);
+        break;
+    default:
+        break;
+    }
+    ctx.OpReturnValue(result);
+    ctx.OpFunctionEnd();
+    return func;
+}
+
 Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
     const spv::ImageFormat format{spv::ImageFormat::Unknown};
     const Id type{ctx.F32[1]};
@@ -196,6 +243,56 @@ Id EmitContext::Def(const IR::Value& value) {
     }
 }
 
+Id EmitContext::CasLoop(Id function, CasPointerType pointer_type, Id value_type) {
+    const Id loop_header{OpLabel()};
+    const Id continue_block{OpLabel()};
+    const Id merge_block{OpLabel()};
+    const Id storage_type{pointer_type == CasPointerType::Shared ? shared_memory_u32_type
+                                                                 : storage_memory_u32};
+    const Id func_type{TypeFunction(value_type, U32[1], value_type, storage_type)};
+    const Id func{OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
+    const Id index{OpFunctionParameter(U32[1])};
+    const Id op_b{OpFunctionParameter(value_type)};
+    const Id base{OpFunctionParameter(storage_type)};
+    AddLabel();
+    const Id one{Constant(U32[1], 1)};
+    OpBranch(loop_header);
+    AddLabel(loop_header);
+    OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
+    OpBranch(continue_block);
+
+    AddLabel(continue_block);
+    const Id word_pointer{pointer_type == CasPointerType::Shared
+                              ? OpAccessChain(shared_u32, base, index)
+                              : OpAccessChain(storage_u32, base, u32_zero_value, index)};
+    if (value_type.value == F32[2].value) {
+        const Id u32_value{OpLoad(U32[1], word_pointer)};
+        const Id value{OpUnpackHalf2x16(F32[2], u32_value)};
+        const Id new_value{OpFunctionCall(value_type, function, value, op_b)};
+        const Id u32_new_value{OpPackHalf2x16(U32[1], new_value)};
+        const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value,
+                                                    u32_zero_value, u32_new_value, u32_value)};
+        const Id success{OpIEqual(U1, atomic_res, u32_value)};
+        OpBranchConditional(success, merge_block, loop_header);
+
+        AddLabel(merge_block);
+        OpReturnValue(OpUnpackHalf2x16(F32[2], atomic_res));
+    } else {
+        const Id value{OpLoad(U32[1], word_pointer)};
+        const Id new_value{OpBitcast(
+            U32[1], OpFunctionCall(value_type, function, OpBitcast(value_type, value), op_b))};
+        const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value,
+                                                    u32_zero_value, new_value, value)};
+        const Id success{OpIEqual(U1, atomic_res, value)};
+        OpBranchConditional(success, merge_block, loop_header);
+
+        AddLabel(merge_block);
+        OpReturnValue(OpBitcast(value_type, atomic_res));
+    }
+    OpFunctionEnd();
+    return func;
+}
+
 void EmitContext::DefineCommonTypes(const Info& info) {
     void_id = TypeVoid();
 
@@ -300,9 +397,9 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
     }
     const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
     const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))};
-    const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)};
+    shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
     shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
-    shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup);
+    shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
     interfaces.push_back(shared_memory_u32);
 
     const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
@@ -346,6 +443,14 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
     if (program.info.uses_int16) {
         shared_store_u16_func = make_function(16, 16);
     }
+    if (program.info.uses_shared_increment) {
+        const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])};
+        increment_cas_shared = CasLoop(inc_func, CasPointerType::Shared, U32[1]);
+    }
+    if (program.info.uses_shared_decrement) {
+        const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])};
+        decrement_cas_shared = CasLoop(dec_func, CasPointerType::Shared, U32[1]);
+    }
 }
 
 void EmitContext::DefineAttributeMemAccess(const Info& info) {
@@ -530,12 +635,12 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
     MemberName(struct_type, 0, "data");
     MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
 
-    const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
+    storage_memory_u32 = TypePointer(spv::StorageClass::StorageBuffer, struct_type);
     storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
 
     u32 index{};
     for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
-        const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)};
+        const Id id{AddGlobalVariable(storage_memory_u32, spv::StorageClass::StorageBuffer)};
         Decorate(id, spv::Decoration::Binding, binding);
         Decorate(id, spv::Decoration::DescriptorSet, 0U);
         Name(id, fmt::format("ssbo{}", index));
@@ -546,6 +651,51 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
         index += desc.count;
         binding += desc.count;
     }
+    if (info.uses_global_increment) {
+        AddCapability(spv::Capability::VariablePointersStorageBuffer);
+        const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])};
+        increment_cas_ssbo = CasLoop(inc_func, CasPointerType::Ssbo, U32[1]);
+    }
+    if (info.uses_global_decrement) {
+        AddCapability(spv::Capability::VariablePointersStorageBuffer);
+        const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])};
+        decrement_cas_ssbo = CasLoop(dec_func, CasPointerType::Ssbo, U32[1]);
+    }
+    if (info.uses_atomic_f32_add) {
+        AddCapability(spv::Capability::VariablePointersStorageBuffer);
+        const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[1])};
+        f32_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[1]);
+    }
+    if (info.uses_atomic_f16x2_add) {
+        AddCapability(spv::Capability::VariablePointersStorageBuffer);
+        const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F16[2])};
+        f16x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F16[2]);
+    }
+    if (info.uses_atomic_f16x2_min) {
+        AddCapability(spv::Capability::VariablePointersStorageBuffer);
+        const Id func{CasFunction(*this, CasFunctionType::FPMin, F16[2])};
+        f16x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]);
+    }
+    if (info.uses_atomic_f16x2_max) {
+        AddCapability(spv::Capability::VariablePointersStorageBuffer);
+        const Id func{CasFunction(*this, CasFunctionType::FPMax, F16[2])};
+        f16x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]);
+    }
+    if (info.uses_atomic_f32x2_add) {
+        AddCapability(spv::Capability::VariablePointersStorageBuffer);
+        const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[2])};
+        f32x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[2]);
+    }
+    if (info.uses_atomic_f32x2_min) {
+        AddCapability(spv::Capability::VariablePointersStorageBuffer);
+        const Id func{CasFunction(*this, CasFunctionType::FPMin, F32[2])};
+        f32x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]);
+    }
+    if (info.uses_atomic_f32x2_max) {
+        AddCapability(spv::Capability::VariablePointersStorageBuffer);
+        const Id func{CasFunction(*this, CasFunctionType::FPMax, F32[2])};
+        f32x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]);
+    }
 }
 
 void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index e70f3458c5..34f38454fa 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -94,6 +94,7 @@ public:
     Id output_f32{};
 
     Id storage_u32{};
+    Id storage_memory_u32{};
 
     Id image_buffer_type{};
     Id sampled_texture_buffer_type{};
@@ -136,9 +137,21 @@ public:
     Id shared_memory_u32{};
     Id shared_memory_u32x2{};
     Id shared_memory_u32x4{};
+    Id shared_memory_u32_type{};
 
     Id shared_store_u8_func{};
     Id shared_store_u16_func{};
+    Id increment_cas_shared{};
+    Id increment_cas_ssbo{};
+    Id decrement_cas_shared{};
+    Id decrement_cas_ssbo{};
+    Id f32_add_cas{};
+    Id f16x2_add_cas{};
+    Id f16x2_min_cas{};
+    Id f16x2_max_cas{};
+    Id f32x2_add_cas{};
+    Id f32x2_min_cas{};
+    Id f32x2_max_cas{};
 
     Id input_position{};
     std::array<Id, 32> input_generics{};
@@ -153,6 +166,11 @@ public:
     std::vector<Id> interfaces;
 
 private:
+    enum class CasPointerType {
+        Shared,
+        Ssbo,
+    };
+
     void DefineCommonTypes(const Info& info);
     void DefineCommonConstants();
     void DefineInterfaces(const Info& info);
@@ -171,6 +189,8 @@ private:
 
     void DefineInputs(const Info& info);
     void DefineOutputs(const Info& info);
+
+    [[nodiscard]] Id CasLoop(Id function, CasPointerType pointer_type, Id value_type);
 };
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 5a1ffd61cb..9248bd78ba 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -238,6 +238,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
             ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
         }
     }
+    if (info.uses_64_bit_atomics && profile.support_int64_atomics) {
+        ctx.AddCapability(spv::Capability::Int64Atomics);
+    }
     if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
         ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
     }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 12b7993aef..a3398a6054 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -306,6 +306,101 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value);
+Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                               Id value);
+Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value);
+Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value);
+Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                               Id value);
+Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value);
+Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value);
+Id EmitGlobalAtomicIAdd32(EmitContext& ctx);
+Id EmitGlobalAtomicSMin32(EmitContext& ctx);
+Id EmitGlobalAtomicUMin32(EmitContext& ctx);
+Id EmitGlobalAtomicSMax32(EmitContext& ctx);
+Id EmitGlobalAtomicUMax32(EmitContext& ctx);
+Id EmitGlobalAtomicInc32(EmitContext& ctx);
+Id EmitGlobalAtomicDec32(EmitContext& ctx);
+Id EmitGlobalAtomicAnd32(EmitContext& ctx);
+Id EmitGlobalAtomicOr32(EmitContext& ctx);
+Id EmitGlobalAtomicXor32(EmitContext& ctx);
+Id EmitGlobalAtomicExchange32(EmitContext& ctx);
+Id EmitGlobalAtomicIAdd64(EmitContext& ctx);
+Id EmitGlobalAtomicSMin64(EmitContext& ctx);
+Id EmitGlobalAtomicUMin64(EmitContext& ctx);
+Id EmitGlobalAtomicSMax64(EmitContext& ctx);
+Id EmitGlobalAtomicUMax64(EmitContext& ctx);
+Id EmitGlobalAtomicInc64(EmitContext& ctx);
+Id EmitGlobalAtomicDec64(EmitContext& ctx);
+Id EmitGlobalAtomicAnd64(EmitContext& ctx);
+Id EmitGlobalAtomicOr64(EmitContext& ctx);
+Id EmitGlobalAtomicXor64(EmitContext& ctx);
+Id EmitGlobalAtomicExchange64(EmitContext& ctx);
+Id EmitGlobalAtomicAddF32(EmitContext& ctx);
+Id EmitGlobalAtomicAddF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicAddF32x2(EmitContext& ctx);
+Id EmitGlobalAtomicMinF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicMinF32x2(EmitContext& ctx);
+Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
 Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
 Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
 Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
new file mode 100644
index 0000000000..03d8914199
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -0,0 +1,528 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+
+Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) {
+    const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
+    const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+    const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))};
+    return ctx.profile.support_explicit_workgroup_layout
+               ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)
+               : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
+}
+
+Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) {
+    if (offset.IsImmediate()) {
+        const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)};
+        return ctx.Constant(ctx.U32[1], imm_offset);
+    }
+    const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
+    const Id index{ctx.Def(offset)};
+    if (shift == 0) {
+        return index;
+    }
+    const Id shift_id{ctx.Constant(ctx.U32[1], shift)};
+    return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
+}
+
+Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                     u32 index_offset = 0) {
+    // TODO: Support reinterpreting bindings, guaranteed to be aligned
+    if (!binding.IsImmediate()) {
+        throw NotImplementedException("Dynamic storage buffer indexing");
+    }
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))};
+    return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index);
+}
+
+std::pair<Id, Id> GetAtomicArgs(EmitContext& ctx) {
+    const Id scope{ctx.Constant(ctx.U32[1], static_cast<u32>(spv::Scope::Device))};
+    const Id semantics{ctx.u32_zero_value};
+    return {scope, semantics};
+}
+
+Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) {
+    const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
+    const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
+    const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)};
+    return ctx.OpBitcast(ctx.U64, original_composite);
+}
+
+void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) {
+    const Id composite{ctx.OpBitcast(ctx.U32[2], result)};
+    ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0));
+    ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1));
+}
+} // Anonymous namespace
+
+Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id pointer{GetSharedPointer(ctx, pointer_offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id pointer{GetSharedPointer(ctx, pointer_offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id pointer{GetSharedPointer(ctx, pointer_offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id pointer{GetSharedPointer(ctx, pointer_offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id pointer{GetSharedPointer(ctx, pointer_offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
+    const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)};
+    return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value,
+                              ctx.shared_memory_u32);
+}
+
+Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
+    const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)};
+    return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value,
+                              ctx.shared_memory_u32);
+}
+
+Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id pointer{GetSharedPointer(ctx, pointer_offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id pointer{GetSharedPointer(ctx, pointer_offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id pointer{GetSharedPointer(ctx, pointer_offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id pointer{GetSharedPointer(ctx, pointer_offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) {
+    const Id pointer_1{GetSharedPointer(ctx, pointer_offset)};
+    if (ctx.profile.support_int64_atomics) {
+        const auto [scope, semantics]{GetAtomicArgs(ctx)};
+        return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value);
+    }
+    // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
+    const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)};
+    const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
+    StoreResult(ctx, pointer_1, pointer_2, value);
+    return original_value;
+}
+
+Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id pointer{GetStoragePointer(ctx, binding, offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id pointer{GetStoragePointer(ctx, binding, offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id pointer{GetStoragePointer(ctx, binding, offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id pointer{GetStoragePointer(ctx, binding, offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id pointer{GetStoragePointer(ctx, binding, offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    const Id pointer{GetStoragePointer(ctx, binding, offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value) {
+    const Id pointer{GetStoragePointer(ctx, binding, offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    const Id pointer{GetStoragePointer(ctx, binding, offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                               Id value) {
+    const Id pointer{GetStoragePointer(ctx, binding, offset)};
+    const auto [scope, semantics]{GetAtomicArgs(ctx)};
+    return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
+    if (ctx.profile.support_int64_atomics) {
+        const auto [scope, semantics]{GetAtomicArgs(ctx)};
+        return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value);
+    }
+    // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
+    const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
+    const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
+    const Id result{ctx.OpIAdd(ctx.U64, value, original_value)};
+    StoreResult(ctx, pointer_1, pointer_2, result);
+    return original_value;
+}
+
+Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
+    if (ctx.profile.support_int64_atomics) {
+        const auto [scope, semantics]{GetAtomicArgs(ctx)};
+        return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value);
+    }
+    // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
+    const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
+    const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
+    const Id result{ctx.OpSMin(ctx.U64, value, original_value)};
+    StoreResult(ctx, pointer_1, pointer_2, result);
+    return original_value;
+}
+
+Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
+    if (ctx.profile.support_int64_atomics) {
+        const auto [scope, semantics]{GetAtomicArgs(ctx)};
+        return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value);
+    }
+    // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
+    const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
+    const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
+    const Id result{ctx.OpUMin(ctx.U64, value, original_value)};
+    StoreResult(ctx, pointer_1, pointer_2, result);
+    return original_value;
+}
+
+Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
+    if (ctx.profile.support_int64_atomics) {
+        const auto [scope, semantics]{GetAtomicArgs(ctx)};
+        return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value);
+    }
+    // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
+    const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
+    const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
+    const Id result{ctx.OpSMax(ctx.U64, value, original_value)};
+    StoreResult(ctx, pointer_1, pointer_2, result);
+    return original_value;
+}
+
+Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
+    if (ctx.profile.support_int64_atomics) {
+        const auto [scope, semantics]{GetAtomicArgs(ctx)};
+        return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value);
+    }
+    // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
+    const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
+    const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
+    const Id result{ctx.OpUMax(ctx.U64, value, original_value)};
+    StoreResult(ctx, pointer_1, pointer_2, result);
+    return original_value;
+}
+
+Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
+    if (ctx.profile.support_int64_atomics) {
+        const auto [scope, semantics]{GetAtomicArgs(ctx)};
+        return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value);
+    }
+    // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
+    const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
+    const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
+    const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)};
+    StoreResult(ctx, pointer_1, pointer_2, result);
+    return original_value;
+}
+
+Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                         Id value) {
+    const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
+    if (ctx.profile.support_int64_atomics) {
+        const auto [scope, semantics]{GetAtomicArgs(ctx)};
+        return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value);
+    }
+    // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
+    const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
+    const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
+    const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)};
+    StoreResult(ctx, pointer_1, pointer_2, result);
+    return original_value;
+}
+
+Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                          Id value) {
+    const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
+    if (ctx.profile.support_int64_atomics) {
+        const auto [scope, semantics]{GetAtomicArgs(ctx)};
+        return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value);
+    }
+    // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
+    const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
+    const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
+    const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)};
+    StoreResult(ctx, pointer_1, pointer_2, result);
+    return original_value;
+}
+
+Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                               Id value) {
+    const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
+    if (ctx.profile.support_int64_atomics) {
+        const auto [scope, semantics]{GetAtomicArgs(ctx)};
+        return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value);
+    }
+    // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
+    const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
+    const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
+    StoreResult(ctx, pointer_1, pointer_2, value);
+    return original_value;
+}
+
+Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                           Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)};
+    return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)};
+    return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)};
+    return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)};
+    return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)};
+    return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                             Id value) {
+    const Id ssbo{ctx.ssbos[binding.U32()]};
+    const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+    const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)};
+    return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitGlobalAtomicIAdd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicInc32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicDec32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAnd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicOr32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicXor32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicExchange32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicIAdd64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMin64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMin64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMax64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMax64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicInc64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicDec64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAnd64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicOr64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicXor64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicExchange64(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF16x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF32x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMinF16x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMinF32x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMaxF16x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMaxF32x2(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 17be0c639e..a3339f624a 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -1284,6 +1284,204 @@ U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed)
     return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
 }
 
+U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) {
+    return is_signed ? SharedAtomicSMin(pointer_offset, value)
+                     : SharedAtomicUMin(pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) {
+    return is_signed ? SharedAtomicSMax(pointer_offset, value)
+                     : SharedAtomicUMax(pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value);
+}
+
+U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value);
+    default:
+        ThrowInvalidType(pointer_offset.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) {
+    return is_signed ? GlobalAtomicSMin(pointer_offset, value)
+                     : GlobalAtomicUMin(pointer_offset, value);
+}
+
+U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) {
+    return is_signed ? GlobalAtomicSMax(pointer_offset, value)
+                     : GlobalAtomicUMax(pointer_offset, value);
+}
+
+U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value);
+}
+
+U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value);
+}
+
+U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value);
+    default:
+        ThrowInvalidType(pointer_offset.Type());
+    }
+}
+
+F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
+                                  const FpControl control) {
+    return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
+                                      const FpControl control) {
+    return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
+                                      const FpControl control) {
+    return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
+                                      const FpControl control) {
+    return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value);
+}
+
 U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
     return Inst<U1>(Opcode::LogicalOr, a, b);
 }
@@ -1626,7 +1824,7 @@ Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInst
 }
 
 void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
-                            TextureInstInfo info) {
+                           TextureInstInfo info) {
     const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite};
     Inst(op, Flags{info}, handle, coords, color);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index ec60070efe..f9cbf1304c 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -228,6 +228,45 @@ public:
     [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
     [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
 
+    [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed);
+    [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed);
+    [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value);
+
+    [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value,
+                                          bool is_signed);
+    [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value,
+                                          bool is_signed);
+    [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value);
+    [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value);
+    [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value);
+
+    [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
+                                         const FpControl control = {});
+    [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
+                                             const FpControl control = {});
+    [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
+                                             const FpControl control = {});
+    [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
+                                             const FpControl control = {});
+
     [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
     [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
     [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 2df631791e..0f66c56270 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -93,6 +93,72 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::WriteSharedU32:
     case Opcode::WriteSharedU64:
     case Opcode::WriteSharedU128:
+    case Opcode::SharedAtomicIAdd32:
+    case Opcode::SharedAtomicSMin32:
+    case Opcode::SharedAtomicUMin32:
+    case Opcode::SharedAtomicSMax32:
+    case Opcode::SharedAtomicUMax32:
+    case Opcode::SharedAtomicInc32:
+    case Opcode::SharedAtomicDec32:
+    case Opcode::SharedAtomicAnd32:
+    case Opcode::SharedAtomicOr32:
+    case Opcode::SharedAtomicXor32:
+    case Opcode::SharedAtomicExchange32:
+    case Opcode::SharedAtomicExchange64:
+    case Opcode::GlobalAtomicIAdd32:
+    case Opcode::GlobalAtomicSMin32:
+    case Opcode::GlobalAtomicUMin32:
+    case Opcode::GlobalAtomicSMax32:
+    case Opcode::GlobalAtomicUMax32:
+    case Opcode::GlobalAtomicInc32:
+    case Opcode::GlobalAtomicDec32:
+    case Opcode::GlobalAtomicAnd32:
+    case Opcode::GlobalAtomicOr32:
+    case Opcode::GlobalAtomicXor32:
+    case Opcode::GlobalAtomicExchange32:
+    case Opcode::GlobalAtomicIAdd64:
+    case Opcode::GlobalAtomicSMin64:
+    case Opcode::GlobalAtomicUMin64:
+    case Opcode::GlobalAtomicSMax64:
+    case Opcode::GlobalAtomicUMax64:
+    case Opcode::GlobalAtomicAnd64:
+    case Opcode::GlobalAtomicOr64:
+    case Opcode::GlobalAtomicXor64:
+    case Opcode::GlobalAtomicExchange64:
+    case Opcode::GlobalAtomicAddF32:
+    case Opcode::GlobalAtomicAddF16x2:
+    case Opcode::GlobalAtomicAddF32x2:
+    case Opcode::GlobalAtomicMinF16x2:
+    case Opcode::GlobalAtomicMinF32x2:
+    case Opcode::GlobalAtomicMaxF16x2:
+    case Opcode::GlobalAtomicMaxF32x2:
+    case Opcode::StorageAtomicIAdd32:
+    case Opcode::StorageAtomicSMin32:
+    case Opcode::StorageAtomicUMin32:
+    case Opcode::StorageAtomicSMax32:
+    case Opcode::StorageAtomicUMax32:
+    case Opcode::StorageAtomicInc32:
+    case Opcode::StorageAtomicDec32:
+    case Opcode::StorageAtomicAnd32:
+    case Opcode::StorageAtomicOr32:
+    case Opcode::StorageAtomicXor32:
+    case Opcode::StorageAtomicExchange32:
+    case Opcode::StorageAtomicIAdd64:
+    case Opcode::StorageAtomicSMin64:
+    case Opcode::StorageAtomicUMin64:
+    case Opcode::StorageAtomicSMax64:
+    case Opcode::StorageAtomicUMax64:
+    case Opcode::StorageAtomicAnd64:
+    case Opcode::StorageAtomicOr64:
+    case Opcode::StorageAtomicXor64:
+    case Opcode::StorageAtomicExchange64:
+    case Opcode::StorageAtomicAddF32:
+    case Opcode::StorageAtomicAddF16x2:
+    case Opcode::StorageAtomicAddF32x2:
+    case Opcode::StorageAtomicMinF16x2:
+    case Opcode::StorageAtomicMinF32x2:
+    case Opcode::StorageAtomicMaxF16x2:
+    case Opcode::StorageAtomicMaxF32x2:
     case Opcode::BindlessImageWrite:
     case Opcode::BoundImageWrite:
     case Opcode::ImageWrite:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 86ea025605..dc776a73e6 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -321,6 +321,76 @@ OPCODE(INotEqual,                                           U1,             U32,
 OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
 OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
 
+// Atomic operations
+OPCODE(SharedAtomicIAdd32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicSMin32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicUMin32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicSMax32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicUMax32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicInc32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicDec32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicAnd32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicOr32,                                    U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicXor32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicExchange32,                              U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicExchange64,                              U64,            U32,            U64,                                                            )
+
+OPCODE(GlobalAtomicIAdd32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicSMin32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicUMin32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicSMax32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicUMax32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicInc32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicDec32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicAnd32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicOr32,                                    U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicXor32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicExchange32,                              U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicIAdd64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicSMin64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicUMin64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicSMax64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicUMax64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicAnd64,                                   U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicOr64,                                    U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicXor64,                                   U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicExchange64,                              U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicAddF32,                                  F32,            U64,            F32,                                                            )
+OPCODE(GlobalAtomicAddF16x2,                                U32,            U64,            F16x2,                                                          )
+OPCODE(GlobalAtomicAddF32x2,                                U32,            U64,            F32x2,                                                          )
+OPCODE(GlobalAtomicMinF16x2,                                U32,            U64,            F16x2,                                                          )
+OPCODE(GlobalAtomicMinF32x2,                                U32,            U64,            F32x2,                                                          )
+OPCODE(GlobalAtomicMaxF16x2,                                U32,            U64,            F16x2,                                                          )
+OPCODE(GlobalAtomicMaxF32x2,                                U32,            U64,            F32x2,                                                          )
+
+OPCODE(StorageAtomicIAdd32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicSMin32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicUMin32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicSMax32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicUMax32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicInc32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicDec32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicAnd32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicOr32,                                   U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicXor32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicExchange32,                             U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicIAdd64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicSMin64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicUMin64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicSMax64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicUMax64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicAnd64,                                  U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicOr64,                                   U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicXor64,                                  U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicExchange64,                             U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicAddF32,                                 F32,            U32,            U32,            F32,                                            )
+OPCODE(StorageAtomicAddF16x2,                               U32,            U32,            U32,            F16x2,                                          )
+OPCODE(StorageAtomicAddF32x2,                               U32,            U32,            U32,            F32x2,                                          )
+OPCODE(StorageAtomicMinF16x2,                               U32,            U32,            U32,            F16x2,                                          )
+OPCODE(StorageAtomicMinF32x2,                               U32,            U32,            U32,            F32x2,                                          )
+OPCODE(StorageAtomicMaxF16x2,                               U32,            U32,            U32,            F16x2,                                          )
+OPCODE(StorageAtomicMaxF32x2,                               U32,            U32,            U32,            F32x2,                                          )
+
 // Logical operations
 OPCODE(LogicalOr,                                           U1,             U1,             U1,                                                             )
 OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                                             )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 0000000000..7a32c5eb33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,222 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+    ADD,
+    MIN,
+    MAX,
+    INC,
+    DEC,
+    AND,
+    OR,
+    XOR,
+    EXCH,
+    SAFEADD,
+};
+
+enum class AtomSize : u64 {
+    U32,
+    S32,
+    U64,
+    F32,
+    F16x2,
+    S64,
+};
+
+IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
+                              AtomOp op, bool is_signed) {
+    switch (op) {
+    case AtomOp::ADD:
+        return ir.GlobalAtomicIAdd(offset, op_b);
+    case AtomOp::MIN:
+        return ir.GlobalAtomicIMin(offset, op_b, is_signed);
+    case AtomOp::MAX:
+        return ir.GlobalAtomicIMax(offset, op_b, is_signed);
+    case AtomOp::INC:
+        return ir.GlobalAtomicInc(offset, op_b);
+    case AtomOp::DEC:
+        return ir.GlobalAtomicDec(offset, op_b);
+    case AtomOp::AND:
+        return ir.GlobalAtomicAnd(offset, op_b);
+    case AtomOp::OR:
+        return ir.GlobalAtomicOr(offset, op_b);
+    case AtomOp::XOR:
+        return ir.GlobalAtomicXor(offset, op_b);
+    case AtomOp::EXCH:
+        return ir.GlobalAtomicExchange(offset, op_b);
+    default:
+        throw NotImplementedException("Integer Atom Operation {}", op);
+    }
+}
+
+IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
+                        AtomSize size) {
+    static constexpr IR::FpControl f16_control{
+        .no_contraction{false},
+        .rounding{IR::FpRounding::RN},
+        .fmz_mode{IR::FmzMode::DontCare},
+    };
+    static constexpr IR::FpControl f32_control{
+        .no_contraction{false},
+        .rounding{IR::FpRounding::RN},
+        .fmz_mode{IR::FmzMode::FTZ},
+    };
+    switch (op) {
+    case AtomOp::ADD:
+        return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
+                                     : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
+    case AtomOp::MIN:
+        return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
+    case AtomOp::MAX:
+        return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
+    default:
+        throw NotImplementedException("FP Atom Operation {}", op);
+    }
+}
+
+IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<28, 20, s64> addr_offset;
+        BitField<28, 20, u64> rz_addr_offset;
+        BitField<48, 1, u64> e;
+    } const mem{insn};
+
+    const IR::U64 address{[&]() -> IR::U64 {
+        if (mem.e == 0) {
+            return v.ir.UConvert(64, v.X(mem.addr_reg));
+        }
+        return v.L(mem.addr_reg);
+    }()};
+    const u64 addr_offset{[&]() -> u64 {
+        if (mem.addr_reg == IR::Reg::RZ) {
+            // When RZ is used, the address is an absolute address
+            return static_cast<u64>(mem.rz_addr_offset.Value());
+        } else {
+            return static_cast<u64>(mem.addr_offset.Value());
+        }
+    }()};
+    return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+
+bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
+    // TODO: SAFEADD
+    switch (size) {
+    case AtomSize::S32:
+    case AtomSize::U64:
+        return (op == AtomOp::INC || op == AtomOp::DEC);
+    case AtomSize::S64:
+        return !(op == AtomOp::MIN || op == AtomOp::MAX);
+    case AtomSize::F32:
+        return op != AtomOp::ADD;
+    case AtomSize::F16x2:
+        return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
+    default:
+        return false;
+    }
+}
+
+IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
+    switch (size) {
+    case AtomSize::U32:
+    case AtomSize::S32:
+    case AtomSize::F32:
+    case AtomSize::F16x2:
+        return ir.LoadGlobal32(offset);
+    case AtomSize::U64:
+    case AtomSize::S64:
+        return ir.PackUint2x32(ir.LoadGlobal64(offset));
+    default:
+        throw NotImplementedException("Atom Size {}", size);
+    }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
+    switch (size) {
+    case AtomSize::U32:
+    case AtomSize::S32:
+    case AtomSize::F16x2:
+        return v.X(dest_reg, IR::U32{result});
+    case AtomSize::U64:
+    case AtomSize::S64:
+        return v.L(dest_reg, IR::U64{result});
+    case AtomSize::F32:
+        return v.F(dest_reg, IR::F32{result});
+    default:
+        break;
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOM(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<20, 8, IR::Reg> src_reg_b;
+        BitField<49, 3, AtomSize> size;
+        BitField<52, 4, AtomOp> op;
+    } const atom{insn};
+
+    const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64};
+    const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64};
+    const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2};
+    const IR::U64 offset{AtomOffset(*this, insn)};
+    IR::Value result;
+
+    if (AtomOpNotApplicable(atom.size, atom.op)) {
+        result = LoadGlobal(ir, offset, atom.size);
+    } else if (!is_integer) {
+        if (atom.size == AtomSize::F32) {
+            result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size);
+        } else {
+            const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))};
+            result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size);
+        }
+    } else if (size_64) {
+        result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed);
+    } else {
+        result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed);
+    }
+    StoreResult(*this, atom.dest_reg, result, atom.size);
+}
+
+void TranslatorVisitor::RED(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> src_reg_b;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<20, 3, AtomSize> size;
+        BitField<23, 3, AtomOp> op;
+    } const red{insn};
+
+    if (AtomOpNotApplicable(red.size, red.op)) {
+        return;
+    }
+    const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64};
+    const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64};
+    const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2};
+    const IR::U64 offset{AtomOffset(*this, insn)};
+    if (!is_integer) {
+        if (red.size == AtomSize::F32) {
+            ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size);
+        } else {
+            const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))};
+            ApplyFpAtomOp(ir, offset, src_b, red.op, red.size);
+        }
+    } else if (size_64) {
+        ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed);
+    } else {
+        ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed);
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 0000000000..8b974621e9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+    ADD,
+    MIN,
+    MAX,
+    INC,
+    DEC,
+    AND,
+    OR,
+    XOR,
+    EXCH,
+};
+
+enum class AtomsSize : u64 {
+    U32,
+    S32,
+    U64,
+};
+
+IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
+                        bool is_signed) {
+    switch (op) {
+    case AtomOp::ADD:
+        return ir.SharedAtomicIAdd(offset, op_b);
+    case AtomOp::MIN:
+        return ir.SharedAtomicIMin(offset, op_b, is_signed);
+    case AtomOp::MAX:
+        return ir.SharedAtomicIMax(offset, op_b, is_signed);
+    case AtomOp::INC:
+        return ir.SharedAtomicInc(offset, op_b);
+    case AtomOp::DEC:
+        return ir.SharedAtomicDec(offset, op_b);
+    case AtomOp::AND:
+        return ir.SharedAtomicAnd(offset, op_b);
+    case AtomOp::OR:
+        return ir.SharedAtomicOr(offset, op_b);
+    case AtomOp::XOR:
+        return ir.SharedAtomicXor(offset, op_b);
+    case AtomOp::EXCH:
+        return ir.SharedAtomicExchange(offset, op_b);
+    default:
+        throw NotImplementedException("Integer Atoms Operation {}", op);
+    }
+}
+
+IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> offset_reg;
+        BitField<30, 22, u64> absolute_offset;
+        BitField<30, 22, s64> relative_offset;
+    } const encoding{insn};
+
+    if (encoding.offset_reg == IR::Reg::RZ) {
+        return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
+    } else {
+        const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
+        return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+    }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
+    switch (size) {
+    case AtomsSize::U32:
+    case AtomsSize::S32:
+        return v.X(dest_reg, IR::U32{result});
+    case AtomsSize::U64:
+        return v.L(dest_reg, IR::U64{result});
+    default:
+        break;
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOMS(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<20, 8, IR::Reg> src_reg_b;
+        BitField<28, 2, AtomsSize> size;
+        BitField<52, 4, AtomOp> op;
+    } const atoms{insn};
+
+    const bool size_64{atoms.size == AtomsSize::U64};
+    if (size_64 && atoms.op != AtomOp::EXCH) {
+        throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
+    }
+    const bool is_signed{atoms.size == AtomsSize::S32};
+    const IR::U32 offset{AtomsOffset(*this, insn)};
+
+    IR::Value result;
+    if (size_64) {
+        result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
+    } else {
+        result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
+    }
+    StoreResult(*this, atoms.dest_reg, result, atoms.size);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 3279412234..aebe3072a5 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) {
     ThrowNotImplemented(Opcode::ATOM_cas);
 }
 
-void TranslatorVisitor::ATOM(u64) {
-    ThrowNotImplemented(Opcode::ATOM);
-}
-
 void TranslatorVisitor::ATOMS_cas(u64) {
     ThrowNotImplemented(Opcode::ATOMS_cas);
 }
 
-void TranslatorVisitor::ATOMS(u64) {
-    ThrowNotImplemented(Opcode::ATOMS);
-}
-
 void TranslatorVisitor::B2R(u64) {
     ThrowNotImplemented(Opcode::B2R);
 }
@@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) {
     ThrowNotImplemented(Opcode::RAM);
 }
 
-void TranslatorVisitor::RED(u64) {
-    ThrowNotImplemented(Opcode::RED);
-}
-
 void TranslatorVisitor::RET(u64) {
     ThrowNotImplemented(Opcode::RET);
 }
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 9ef8688c98..73373576b1 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::FPOrdGreaterThanEqual16:
     case IR::Opcode::FPUnordGreaterThanEqual16:
     case IR::Opcode::FPIsNan16:
+    case IR::Opcode::GlobalAtomicAddF16x2:
+    case IR::Opcode::StorageAtomicAddF16x2:
+    case IR::Opcode::StorageAtomicMinF16x2:
+    case IR::Opcode::StorageAtomicMaxF16x2:
         info.uses_fp16 = true;
         break;
     case IR::Opcode::CompositeConstructF64x2:
@@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::ConvertF16U64:
     case IR::Opcode::ConvertF32U64:
     case IR::Opcode::ConvertF64U64:
+    case IR::Opcode::SharedAtomicExchange64:
         info.uses_int64 = true;
         break;
     default:
@@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::FSwizzleAdd:
         info.uses_fswzadd = true;
         break;
+    case IR::Opcode::SharedAtomicInc32:
+        info.uses_shared_increment = true;
+        break;
+    case IR::Opcode::SharedAtomicDec32:
+        info.uses_shared_decrement = true;
+        break;
+    case IR::Opcode::GlobalAtomicInc32:
+    case IR::Opcode::StorageAtomicInc32:
+        info.uses_global_increment = true;
+        break;
+    case IR::Opcode::GlobalAtomicDec32:
+    case IR::Opcode::StorageAtomicDec32:
+        info.uses_global_decrement = true;
+        break;
+    case IR::Opcode::GlobalAtomicAddF32:
+    case IR::Opcode::StorageAtomicAddF32:
+        info.uses_atomic_f32_add = true;
+        break;
+    case IR::Opcode::GlobalAtomicAddF16x2:
+    case IR::Opcode::StorageAtomicAddF16x2:
+        info.uses_atomic_f16x2_add = true;
+        break;
+    case IR::Opcode::GlobalAtomicAddF32x2:
+    case IR::Opcode::StorageAtomicAddF32x2:
+        info.uses_atomic_f32x2_add = true;
+        break;
+    case IR::Opcode::GlobalAtomicMinF16x2:
+    case IR::Opcode::StorageAtomicMinF16x2:
+        info.uses_atomic_f16x2_min = true;
+        break;
+    case IR::Opcode::GlobalAtomicMinF32x2:
+    case IR::Opcode::StorageAtomicMinF32x2:
+        info.uses_atomic_f32x2_min = true;
+        break;
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+    case IR::Opcode::StorageAtomicMaxF16x2:
+        info.uses_atomic_f16x2_max = true;
+        break;
+    case IR::Opcode::GlobalAtomicMaxF32x2:
+    case IR::Opcode::StorageAtomicMaxF32x2:
+        info.uses_atomic_f32x2_max = true;
+        break;
+    case IR::Opcode::GlobalAtomicIAdd64:
+    case IR::Opcode::GlobalAtomicSMin64:
+    case IR::Opcode::GlobalAtomicUMin64:
+    case IR::Opcode::GlobalAtomicSMax64:
+    case IR::Opcode::GlobalAtomicUMax64:
+    case IR::Opcode::GlobalAtomicAnd64:
+    case IR::Opcode::GlobalAtomicOr64:
+    case IR::Opcode::GlobalAtomicXor64:
+    case IR::Opcode::GlobalAtomicExchange64:
+    case IR::Opcode::StorageAtomicIAdd64:
+    case IR::Opcode::StorageAtomicSMin64:
+    case IR::Opcode::StorageAtomicUMin64:
+    case IR::Opcode::StorageAtomicSMax64:
+    case IR::Opcode::StorageAtomicUMax64:
+    case IR::Opcode::StorageAtomicAnd64:
+    case IR::Opcode::StorageAtomicOr64:
+    case IR::Opcode::StorageAtomicXor64:
+        info.uses_64_bit_atomics = true;
+        break;
+    case IR::Opcode::SharedAtomicExchange64:
+        info.uses_64_bit_atomics = true;
+        info.uses_shared_memory_u32x2 = true;
+        break;
     default:
         break;
     }
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index afe871505e..0d4f266c39 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) {
     case IR::Opcode::WriteGlobal32:
     case IR::Opcode::WriteGlobal64:
     case IR::Opcode::WriteGlobal128:
+    case IR::Opcode::GlobalAtomicIAdd32:
+    case IR::Opcode::GlobalAtomicSMin32:
+    case IR::Opcode::GlobalAtomicUMin32:
+    case IR::Opcode::GlobalAtomicSMax32:
+    case IR::Opcode::GlobalAtomicUMax32:
+    case IR::Opcode::GlobalAtomicInc32:
+    case IR::Opcode::GlobalAtomicDec32:
+    case IR::Opcode::GlobalAtomicAnd32:
+    case IR::Opcode::GlobalAtomicOr32:
+    case IR::Opcode::GlobalAtomicXor32:
+    case IR::Opcode::GlobalAtomicExchange32:
+    case IR::Opcode::GlobalAtomicIAdd64:
+    case IR::Opcode::GlobalAtomicSMin64:
+    case IR::Opcode::GlobalAtomicUMin64:
+    case IR::Opcode::GlobalAtomicSMax64:
+    case IR::Opcode::GlobalAtomicUMax64:
+    case IR::Opcode::GlobalAtomicAnd64:
+    case IR::Opcode::GlobalAtomicOr64:
+    case IR::Opcode::GlobalAtomicXor64:
+    case IR::Opcode::GlobalAtomicExchange64:
+    case IR::Opcode::GlobalAtomicAddF32:
+    case IR::Opcode::GlobalAtomicAddF16x2:
+    case IR::Opcode::GlobalAtomicAddF32x2:
+    case IR::Opcode::GlobalAtomicMinF16x2:
+    case IR::Opcode::GlobalAtomicMinF32x2:
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+    case IR::Opcode::GlobalAtomicMaxF32x2:
         return true;
     default:
         return false;
@@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
         return IR::Opcode::WriteStorage64;
     case IR::Opcode::WriteGlobal128:
         return IR::Opcode::WriteStorage128;
+    case IR::Opcode::GlobalAtomicIAdd32:
+        return IR::Opcode::StorageAtomicIAdd32;
+    case IR::Opcode::GlobalAtomicSMin32:
+        return IR::Opcode::StorageAtomicSMin32;
+    case IR::Opcode::GlobalAtomicUMin32:
+        return IR::Opcode::StorageAtomicUMin32;
+    case IR::Opcode::GlobalAtomicSMax32:
+        return IR::Opcode::StorageAtomicSMax32;
+    case IR::Opcode::GlobalAtomicUMax32:
+        return IR::Opcode::StorageAtomicUMax32;
+    case IR::Opcode::GlobalAtomicInc32:
+        return IR::Opcode::StorageAtomicInc32;
+    case IR::Opcode::GlobalAtomicDec32:
+        return IR::Opcode::StorageAtomicDec32;
+    case IR::Opcode::GlobalAtomicAnd32:
+        return IR::Opcode::StorageAtomicAnd32;
+    case IR::Opcode::GlobalAtomicOr32:
+        return IR::Opcode::StorageAtomicOr32;
+    case IR::Opcode::GlobalAtomicXor32:
+        return IR::Opcode::StorageAtomicXor32;
+    case IR::Opcode::GlobalAtomicIAdd64:
+        return IR::Opcode::StorageAtomicIAdd64;
+    case IR::Opcode::GlobalAtomicSMin64:
+        return IR::Opcode::StorageAtomicSMin64;
+    case IR::Opcode::GlobalAtomicUMin64:
+        return IR::Opcode::StorageAtomicUMin64;
+    case IR::Opcode::GlobalAtomicSMax64:
+        return IR::Opcode::StorageAtomicSMax64;
+    case IR::Opcode::GlobalAtomicUMax64:
+        return IR::Opcode::StorageAtomicUMax64;
+    case IR::Opcode::GlobalAtomicAnd64:
+        return IR::Opcode::StorageAtomicAnd64;
+    case IR::Opcode::GlobalAtomicOr64:
+        return IR::Opcode::StorageAtomicOr64;
+    case IR::Opcode::GlobalAtomicXor64:
+        return IR::Opcode::StorageAtomicXor64;
+    case IR::Opcode::GlobalAtomicExchange32:
+        return IR::Opcode::StorageAtomicExchange32;
+    case IR::Opcode::GlobalAtomicExchange64:
+        return IR::Opcode::StorageAtomicExchange64;
+    case IR::Opcode::GlobalAtomicAddF32:
+        return IR::Opcode::StorageAtomicAddF32;
+    case IR::Opcode::GlobalAtomicAddF16x2:
+        return IR::Opcode::StorageAtomicAddF16x2;
+    case IR::Opcode::GlobalAtomicMinF16x2:
+        return IR::Opcode::StorageAtomicMinF16x2;
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+        return IR::Opcode::StorageAtomicMaxF16x2;
+    case IR::Opcode::GlobalAtomicAddF32x2:
+        return IR::Opcode::StorageAtomicAddF32x2;
+    case IR::Opcode::GlobalAtomicMinF32x2:
+        return IR::Opcode::StorageAtomicMinF32x2;
+    case IR::Opcode::GlobalAtomicMaxF32x2:
+        return IR::Opcode::StorageAtomicMaxF32x2;
     default:
         throw InvalidArgument("Invalid global memory opcode {}", opcode);
     }
@@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index
     inst.Invalidate();
 }
 
+/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
+void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+                   const IR::U32& offset) {
+    const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
+    const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+    const IR::Value value{
+        &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
+    inst.ReplaceUsesWith(value);
+}
+
 /// Replace a global memory instruction with its storage buffer equivalent
 void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
              const IR::U32& offset) {
@@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
     case IR::Opcode::WriteGlobal64:
     case IR::Opcode::WriteGlobal128:
         return ReplaceWrite(block, inst, storage_index, offset);
+    case IR::Opcode::GlobalAtomicIAdd32:
+    case IR::Opcode::GlobalAtomicSMin32:
+    case IR::Opcode::GlobalAtomicUMin32:
+    case IR::Opcode::GlobalAtomicSMax32:
+    case IR::Opcode::GlobalAtomicUMax32:
+    case IR::Opcode::GlobalAtomicInc32:
+    case IR::Opcode::GlobalAtomicDec32:
+    case IR::Opcode::GlobalAtomicAnd32:
+    case IR::Opcode::GlobalAtomicOr32:
+    case IR::Opcode::GlobalAtomicXor32:
+    case IR::Opcode::GlobalAtomicExchange32:
+    case IR::Opcode::GlobalAtomicIAdd64:
+    case IR::Opcode::GlobalAtomicSMin64:
+    case IR::Opcode::GlobalAtomicUMin64:
+    case IR::Opcode::GlobalAtomicSMax64:
+    case IR::Opcode::GlobalAtomicUMax64:
+    case IR::Opcode::GlobalAtomicAnd64:
+    case IR::Opcode::GlobalAtomicOr64:
+    case IR::Opcode::GlobalAtomicXor64:
+    case IR::Opcode::GlobalAtomicExchange64:
+    case IR::Opcode::GlobalAtomicAddF32:
+    case IR::Opcode::GlobalAtomicAddF16x2:
+    case IR::Opcode::GlobalAtomicAddF32x2:
+    case IR::Opcode::GlobalAtomicMinF16x2:
+    case IR::Opcode::GlobalAtomicMinF32x2:
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+    case IR::Opcode::GlobalAtomicMaxF32x2:
+        return ReplaceAtomic(block, inst, storage_index, offset);
     default:
         throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
     }
@@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
             CollectStorageBuffers(*block, inst, info);
         }
     }
-    u32 storage_index{};
     for (const StorageBufferAddr& storage_buffer : info.set) {
         program.info.storage_buffers_descriptors.push_back({
             .cbuf_index = storage_buffer.index,
@@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
             .count = 1,
             .is_written{info.writes.contains(storage_buffer)},
         });
-        ++storage_index;
     }
     for (const StorageInst& storage_inst : info.to_replace) {
         const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
index 52576b07fc..62e73d52d9 100644
--- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) {
         return IR::Opcode::ConvertF32U32;
     case IR::Opcode::ConvertF16U64:
         return IR::Opcode::ConvertF32U64;
+    case IR::Opcode::GlobalAtomicAddF16x2:
+        return IR::Opcode::GlobalAtomicAddF32x2;
+    case IR::Opcode::StorageAtomicAddF16x2:
+        return IR::Opcode::StorageAtomicAddF32x2;
+    case IR::Opcode::GlobalAtomicMinF16x2:
+        return IR::Opcode::GlobalAtomicMinF32x2;
+    case IR::Opcode::StorageAtomicMinF16x2:
+        return IR::Opcode::StorageAtomicMinF32x2;
+    case IR::Opcode::GlobalAtomicMaxF16x2:
+        return IR::Opcode::GlobalAtomicMaxF32x2;
+    case IR::Opcode::StorageAtomicMaxF16x2:
+        return IR::Opcode::StorageAtomicMaxF32x2;
     default:
         return op;
     }
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index f0d68d5163..a4e41bda1d 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -38,6 +38,7 @@ struct Profile {
     bool support_viewport_index_layer_non_geometry{};
     bool support_typeless_image_loads{};
     bool warp_size_potentially_larger_than_guest{};
+    bool support_int64_atomics{};
 
     // FClamp is broken and OpFMax + OpFMin should be used instead
     bool has_broken_spirv_clamp{};
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 3fbe992687..7bcecf5549 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -128,6 +128,19 @@ struct Info {
     bool uses_subgroup_mask{};
     bool uses_fswzadd{};
     bool uses_typeless_image_reads{};
+    bool uses_shared_increment{};
+    bool uses_shared_decrement{};
+    bool uses_global_increment{};
+    bool uses_global_decrement{};
+    bool uses_atomic_f32_add{};
+    bool uses_atomic_f16x2_add{};
+    bool uses_atomic_f16x2_min{};
+    bool uses_atomic_f16x2_max{};
+    bool uses_atomic_f32x2_add{};
+    bool uses_atomic_f32x2_min{};
+    bool uses_atomic_f32x2_max{};
+    bool uses_64_bit_atomics{};
+    bool uses_shared_memory_u32x2{};
 
     IR::Type used_constant_buffer_types{};
 
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index f699a9bdfe..b953d694b0 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -637,6 +637,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
             device.IsExtShaderViewportIndexLayerSupported(),
         .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
         .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
+        .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
         .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
         .generic_input_types{},
         .fixed_state_point_size{},
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 78bb741bcc..911dfed440 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -681,6 +681,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
     bool has_ext_transform_feedback{};
     bool has_ext_custom_border_color{};
     bool has_ext_extended_dynamic_state{};
+    bool has_ext_shader_atomic_int64{};
     for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
         const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
                               bool push) {
@@ -710,6 +711,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
         test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
         test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
         test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
+        test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
         test(has_khr_workgroup_memory_explicit_layout,
              VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
         if (Settings::values.renderer_debug) {
@@ -760,6 +762,18 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
     } else {
         is_warp_potentially_bigger = true;
     }
+    if (has_ext_shader_atomic_int64) {
+        VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
+        atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
+        atomic_int64.pNext = nullptr;
+        features.pNext = &atomic_int64;
+        physical.GetFeatures2KHR(features);
+
+        if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) {
+            extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
+            ext_shader_atomic_int64 = true;
+        }
+    }
     if (has_ext_transform_feedback) {
         VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
         tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index adf62a707e..4e6d133087 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -229,6 +229,11 @@ public:
         return ext_shader_stencil_export;
     }
 
+    /// Returns true if the device supports VK_KHR_shader_atomic_int64.
+    bool IsExtShaderAtomicInt64Supported() const {
+        return ext_shader_atomic_int64;
+    }
+
     /// Returns true when a known debugging tool is attached.
     bool HasDebuggingToolAttached() const {
         return has_renderdoc || has_nsight_graphics;
@@ -320,6 +325,7 @@ private:
     bool ext_custom_border_color{};         ///< Support for VK_EXT_custom_border_color.
     bool ext_extended_dynamic_state{};      ///< Support for VK_EXT_extended_dynamic_state.
     bool ext_shader_stencil_export{};       ///< Support for VK_EXT_shader_stencil_export.
+    bool ext_shader_atomic_int64{};         ///< Support for VK_KHR_shader_atomic_int64.
     bool nv_device_diagnostics_config{};    ///< Support for VK_NV_device_diagnostics_config.
     bool has_renderdoc{};                   ///< Has RenderDoc attached
     bool has_nsight_graphics{};             ///< Has Nsight Graphics attached
-- 
cgit v1.2.3-70-g09d2


From a6cef71cc0b03f929f1bc97152b302562f46bc53 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 12 Apr 2021 03:48:15 -0300
Subject: shader: Implement OUT

---
 src/shader_recompiler/CMakeLists.txt               |  1 +
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  2 +
 .../backend/spirv/emit_spirv_special.cpp           |  8 ++++
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  8 ++++
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  3 ++
 .../frontend/ir/microinstruction.cpp               |  2 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |  2 +
 .../translate/impl/load_store_attribute.cpp        |  7 +---
 .../maxwell/translate/impl/not_implemented.cpp     | 12 ------
 .../maxwell/translate/impl/output_geometry.cpp     | 45 ++++++++++++++++++++++
 10 files changed, 73 insertions(+), 17 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 8c24c1377d..bbbfa98a3f 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -134,6 +134,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/move_register_to_predicate.cpp
     frontend/maxwell/translate/impl/move_special_register.cpp
     frontend/maxwell/translate/impl/not_implemented.cpp
+    frontend/maxwell/translate/impl/output_geometry.cpp
     frontend/maxwell/translate/impl/predicate_set_predicate.cpp
     frontend/maxwell/translate/impl/predicate_set_register.cpp
     frontend/maxwell/translate/impl/select_source_with_predicate.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 150477ff67..4400752126 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -34,6 +34,8 @@ void EmitMemoryBarrierDeviceLevel(EmitContext& ctx);
 void EmitMemoryBarrierSystemLevel(EmitContext& ctx);
 void EmitPrologue(EmitContext& ctx);
 void EmitEpilogue(EmitContext& ctx);
+void EmitEmitVertex(EmitContext& ctx, Id stream);
+void EmitEndPrimitive(EmitContext& ctx, Id stream);
 void EmitGetRegister(EmitContext& ctx);
 void EmitSetRegister(EmitContext& ctx);
 void EmitGetPred(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
index 5f80c189f5..d20f4def33 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
@@ -36,4 +36,12 @@ void EmitEpilogue(EmitContext& ctx) {
     }
 }
 
+void EmitEmitVertex(EmitContext& ctx, Id stream) {
+    ctx.OpEmitStreamVertex(stream);
+}
+
+void EmitEndPrimitive(EmitContext& ctx, Id stream) {
+    ctx.OpEndStreamPrimitive(stream);
+}
+
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 54a273a921..7d48fa1ba6 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -125,6 +125,14 @@ void IREmitter::Epilogue() {
     Inst(Opcode::Epilogue);
 }
 
+void IREmitter::EmitVertex(const U32& stream) {
+    Inst(Opcode::EmitVertex, stream);
+}
+
+void IREmitter::EndPrimitive(const U32& stream) {
+    Inst(Opcode::EndPrimitive, stream);
+}
+
 U32 IREmitter::GetReg(IR::Reg reg) {
     return Inst<U32>(Opcode::GetRegister, reg);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index d04224707a..033c4332e4 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -43,6 +43,9 @@ public:
     void Prologue();
     void Epilogue();
 
+    void EmitVertex(const U32& stream);
+    void EndPrimitive(const U32& stream);
+
     [[nodiscard]] U32 GetReg(IR::Reg reg);
     void SetReg(IR::Reg reg, const U32& value);
 
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 0f66c56270..204c55fa85 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -69,6 +69,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::MemoryBarrierSystemLevel:
     case Opcode::Prologue:
     case Opcode::Epilogue:
+    case Opcode::EmitVertex:
+    case Opcode::EndPrimitive:
     case Opcode::SetAttribute:
     case Opcode::SetAttributeIndexed:
     case Opcode::SetFragColor:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index f700086822..0e487f1a78 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -25,6 +25,8 @@ OPCODE(MemoryBarrierSystemLevel,                            Void,
 // Special operations
 OPCODE(Prologue,                                            Void,                                                                                           )
 OPCODE(Epilogue,                                            Void,                                                                                           )
+OPCODE(EmitVertex,                                          Void,           U32,                                                                            )
+OPCODE(EndPrimitive,                                        Void,           U32,                                                                            )
 
 // Context getters/setters
 OPCODE(GetRegister,                                         U32,            Reg,                                                                            )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
index f629e71679..79293bd6b3 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -64,7 +64,7 @@ void TranslatorVisitor::ALD(u64 insn) {
         BitField<8, 8, IR::Reg> index_reg;
         BitField<20, 10, u64> absolute_offset;
         BitField<20, 11, s64> relative_offset;
-        BitField<39, 8, IR::Reg> stream_reg;
+        BitField<39, 8, IR::Reg> array_reg;
         BitField<32, 1, u64> o;
         BitField<31, 1, u64> patch;
         BitField<47, 2, Size> size;
@@ -100,16 +100,13 @@ void TranslatorVisitor::AST(u64 insn) {
         BitField<20, 10, u64> absolute_offset;
         BitField<20, 11, s64> relative_offset;
         BitField<31, 1, u64> patch;
-        BitField<39, 8, IR::Reg> stream_reg;
+        BitField<39, 8, IR::Reg> array_reg;
         BitField<47, 2, Size> size;
     } const ast{insn};
 
     if (ast.patch != 0) {
         throw NotImplementedException("P");
     }
-    if (ast.stream_reg != IR::Reg::RZ) {
-        throw NotImplementedException("Stream store");
-    }
     if (ast.index_reg != IR::Reg::RZ) {
         throw NotImplementedException("Indexed store");
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 694bdfccb1..a45d1e4be5 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -169,18 +169,6 @@ void TranslatorVisitor::NOP(u64) {
     // NOP is No-Op.
 }
 
-void TranslatorVisitor::OUT_reg(u64) {
-    ThrowNotImplemented(Opcode::OUT_reg);
-}
-
-void TranslatorVisitor::OUT_cbuf(u64) {
-    ThrowNotImplemented(Opcode::OUT_cbuf);
-}
-
-void TranslatorVisitor::OUT_imm(u64) {
-    ThrowNotImplemented(Opcode::OUT_imm);
-}
-
 void TranslatorVisitor::PBK() {
     // PBK is a no-op
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
new file mode 100644
index 0000000000..01cfad88de
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
@@ -0,0 +1,45 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> output_reg; // Not needed on host
+        BitField<39, 1, u64> emit;
+        BitField<40, 1, u64> cut;
+    } const out{insn};
+
+    stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
+
+    if (out.emit != 0) {
+        v.ir.EmitVertex(stream_index);
+    }
+    if (out.cut != 0) {
+        v.ir.EndPrimitive(stream_index);
+    }
+    // Host doesn't need the output register, but we can write to it to avoid undefined reads
+    v.X(out.dest_reg, v.ir.Imm32(0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::OUT_reg(u64 insn) {
+    OUT(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::OUT_cbuf(u64 insn) {
+    OUT(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::OUT_imm(u64 insn) {
+    OUT(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
-- 
cgit v1.2.3-70-g09d2


From 183855e396cc6918d36fbf3e38ea426e934b4e3e Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 15 Apr 2021 22:46:11 -0300
Subject: shader: Implement tessellation shaders, polygon mode and invocation
 id

---
 src/shader_recompiler/CMakeLists.txt               |   2 +
 .../backend/spirv/emit_context.cpp                 | 147 ++++++++++++++------
 src/shader_recompiler/backend/spirv/emit_context.h |  10 +-
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |  39 ++++++
 src/shader_recompiler/backend/spirv/emit_spirv.h   |   3 +
 .../backend/spirv/emit_spirv_context_get_set.cpp   |  88 ++++++++++--
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  12 ++
 src/shader_recompiler/frontend/ir/ir_emitter.h     |   4 +
 .../frontend/ir/microinstruction.cpp               |   1 +
 src/shader_recompiler/frontend/ir/opcodes.cpp      |   1 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |   3 +
 src/shader_recompiler/frontend/ir/patch.cpp        |  28 ++++
 src/shader_recompiler/frontend/ir/patch.h          | 149 +++++++++++++++++++++
 src/shader_recompiler/frontend/ir/type.h           |  41 +++---
 src/shader_recompiler/frontend/ir/value.cpp        |   9 ++
 src/shader_recompiler/frontend/ir/value.h          |   4 +
 src/shader_recompiler/frontend/maxwell/program.cpp |   5 +
 .../translate/impl/load_store_attribute.cpp        |  33 +++--
 .../translate/impl/move_special_register.cpp       |   2 +
 .../ir_opt/collect_shader_info_pass.cpp            |  41 ++++++
 src/shader_recompiler/profile.h                    |  16 +++
 src/shader_recompiler/shader_info.h                |   5 +
 src/video_core/renderer_vulkan/maxwell_to_vk.cpp   |  13 ++
 src/video_core/renderer_vulkan/maxwell_to_vk.h     |   2 +
 .../renderer_vulkan/vk_graphics_pipeline.cpp       |   3 +-
 .../renderer_vulkan/vk_pipeline_cache.cpp          |  30 +++++
 .../renderer_vulkan/vk_staging_buffer_pool.cpp     |   2 +-
 src/video_core/vulkan_common/vulkan_device.cpp     |   3 +-
 28 files changed, 605 insertions(+), 91 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/ir/patch.cpp
 create mode 100644 src/shader_recompiler/frontend/ir/patch.h

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index bbbfa98a3f..7c11d15bfd 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -41,6 +41,8 @@ add_library(shader_recompiler STATIC
     frontend/ir/opcodes.cpp
     frontend/ir/opcodes.h
     frontend/ir/opcodes.inc
+    frontend/ir/patch.cpp
+    frontend/ir/patch.h
     frontend/ir/post_order.cpp
     frontend/ir/post_order.h
     frontend/ir/pred.h
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 032cf5e03e..067f616137 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -125,19 +125,36 @@ u32 NumVertices(InputTopology input_topology) {
     throw InvalidArgument("Invalid input topology {}", input_topology);
 }
 
-Id DefineInput(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin = std::nullopt) {
-    if (ctx.stage == Stage::Geometry) {
-        const u32 num_vertices{NumVertices(ctx.profile.input_topology)};
-        type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices));
+Id DefineInput(EmitContext& ctx, Id type, bool per_invocation,
+               std::optional<spv::BuiltIn> builtin = std::nullopt) {
+    switch (ctx.stage) {
+    case Stage::TessellationControl:
+    case Stage::TessellationEval:
+        if (per_invocation) {
+            type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], 32u));
+        }
+        break;
+    case Stage::Geometry:
+        if (per_invocation) {
+            const u32 num_vertices{NumVertices(ctx.profile.input_topology)};
+            type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices));
+        }
+        break;
+    default:
+        break;
     }
     return DefineVariable(ctx, type, builtin, spv::StorageClass::Input);
 }
 
-Id DefineOutput(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin = std::nullopt) {
+Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations,
+                std::optional<spv::BuiltIn> builtin = std::nullopt) {
+    if (invocations && ctx.stage == Stage::TessellationControl) {
+        type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], *invocations));
+    }
     return DefineVariable(ctx, type, builtin, spv::StorageClass::Output);
 }
 
-void DefineGenericOutput(EmitContext& ctx, size_t index) {
+void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) {
     static constexpr std::string_view swizzle{"xyzw"};
     const size_t base_attr_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
     u32 element{0};
@@ -150,7 +167,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index) {
         }
         const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
 
-        const Id id{DefineOutput(ctx, ctx.F32[num_components])};
+        const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)};
         ctx.Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
         if (element > 0) {
             ctx.Decorate(id, spv::Decoration::Component, element);
@@ -161,10 +178,10 @@ void DefineGenericOutput(EmitContext& ctx, size_t index) {
             ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset);
         }
         if (num_components < 4 || element > 0) {
-            ctx.Name(id, fmt::format("out_attr{}", index));
-        } else {
             const std::string_view subswizzle{swizzle.substr(element, num_components)};
             ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle));
+        } else {
+            ctx.Name(id, fmt::format("out_attr{}", index));
         }
         const GenericElementInfo info{
             .id = id,
@@ -383,7 +400,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
     AddCapability(spv::Capability::Shader);
     DefineCommonTypes(program.info);
     DefineCommonConstants();
-    DefineInterfaces(program.info);
+    DefineInterfaces(program);
     DefineLocalMemory(program);
     DefineSharedMemory(program);
     DefineSharedMemoryFunctions(program);
@@ -472,9 +489,9 @@ void EmitContext::DefineCommonConstants() {
     f32_zero_value = Constant(F32[1], 0.0f);
 }
 
-void EmitContext::DefineInterfaces(const Info& info) {
-    DefineInputs(info);
-    DefineOutputs(info);
+void EmitContext::DefineInterfaces(const IR::Program& program) {
+    DefineInputs(program.info);
+    DefineOutputs(program);
 }
 
 void EmitContext::DefineLocalMemory(const IR::Program& program) {
@@ -972,26 +989,29 @@ void EmitContext::DefineLabels(IR::Program& program) {
 
 void EmitContext::DefineInputs(const Info& info) {
     if (info.uses_workgroup_id) {
-        workgroup_id = DefineInput(*this, U32[3], spv::BuiltIn::WorkgroupId);
+        workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
     }
     if (info.uses_local_invocation_id) {
-        local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId);
+        local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId);
+    }
+    if (info.uses_invocation_id) {
+        invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId);
     }
     if (info.uses_is_helper_invocation) {
-        is_helper_invocation = DefineInput(*this, U1, spv::BuiltIn::HelperInvocation);
+        is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation);
     }
     if (info.uses_subgroup_mask) {
-        subgroup_mask_eq = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupEqMaskKHR);
-        subgroup_mask_lt = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupLtMaskKHR);
-        subgroup_mask_le = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupLeMaskKHR);
-        subgroup_mask_gt = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupGtMaskKHR);
-        subgroup_mask_ge = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupGeMaskKHR);
+        subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR);
+        subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR);
+        subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
+        subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
+        subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
     }
     if (info.uses_subgroup_invocation_id ||
         (profile.warp_size_potentially_larger_than_guest &&
          (info.uses_subgroup_vote || info.uses_subgroup_mask))) {
         subgroup_local_invocation_id =
-            DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId);
+            DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId);
     }
     if (info.uses_fswzadd) {
         const Id f32_one{Constant(F32[1], 1.0f)};
@@ -1004,29 +1024,32 @@ void EmitContext::DefineInputs(const Info& info) {
     if (info.loads_position) {
         const bool is_fragment{stage != Stage::Fragment};
         const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
-        input_position = DefineInput(*this, F32[4], built_in);
+        input_position = DefineInput(*this, F32[4], true, built_in);
     }
     if (info.loads_instance_id) {
         if (profile.support_vertex_instance_id) {
-            instance_id = DefineInput(*this, U32[1], spv::BuiltIn::InstanceId);
+            instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
         } else {
-            instance_index = DefineInput(*this, U32[1], spv::BuiltIn::InstanceIndex);
-            base_instance = DefineInput(*this, U32[1], spv::BuiltIn::BaseInstance);
+            instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
+            base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
         }
     }
     if (info.loads_vertex_id) {
         if (profile.support_vertex_instance_id) {
-            vertex_id = DefineInput(*this, U32[1], spv::BuiltIn::VertexId);
+            vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
         } else {
-            vertex_index = DefineInput(*this, U32[1], spv::BuiltIn::VertexIndex);
-            base_vertex = DefineInput(*this, U32[1], spv::BuiltIn::BaseVertex);
+            vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
+            base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
         }
     }
     if (info.loads_front_face) {
-        front_face = DefineInput(*this, U1, spv::BuiltIn::FrontFacing);
+        front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
     }
     if (info.loads_point_coord) {
-        point_coord = DefineInput(*this, F32[2], spv::BuiltIn::PointCoord);
+        point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord);
+    }
+    if (info.loads_tess_coord) {
+        tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
     }
     for (size_t index = 0; index < info.input_generics.size(); ++index) {
         const InputVarying generic{info.input_generics[index]};
@@ -1038,7 +1061,7 @@ void EmitContext::DefineInputs(const Info& info) {
             continue;
         }
         const Id type{GetAttributeType(*this, input_type)};
-        const Id id{DefineInput(*this, type)};
+        const Id id{DefineInput(*this, type, true)};
         Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
         Name(id, fmt::format("in_attr{}", index));
         input_generics[index] = id;
@@ -1059,58 +1082,98 @@ void EmitContext::DefineInputs(const Info& info) {
             break;
         }
     }
+    if (stage == Stage::TessellationEval) {
+        for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+            if (!info.uses_patches[index]) {
+                continue;
+            }
+            const Id id{DefineInput(*this, F32[4], false)};
+            Decorate(id, spv::Decoration::Patch);
+            Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+            patches[index] = id;
+        }
+    }
 }
 
-void EmitContext::DefineOutputs(const Info& info) {
+void EmitContext::DefineOutputs(const IR::Program& program) {
+    const Info& info{program.info};
+    const std::optional<u32> invocations{program.invocations};
     if (info.stores_position || stage == Stage::VertexB) {
-        output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position);
+        output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position);
     }
     if (info.stores_point_size || profile.fixed_state_point_size) {
         if (stage == Stage::Fragment) {
             throw NotImplementedException("Storing PointSize in fragment stage");
         }
-        output_point_size = DefineOutput(*this, F32[1], spv::BuiltIn::PointSize);
+        output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize);
     }
     if (info.stores_clip_distance) {
         if (stage == Stage::Fragment) {
             throw NotImplementedException("Storing ClipDistance in fragment stage");
         }
         const Id type{TypeArray(F32[1], Constant(U32[1], 8U))};
-        clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance);
+        clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance);
     }
     if (info.stores_layer &&
         (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
         if (stage == Stage::Fragment) {
             throw NotImplementedException("Storing Layer in fragment stage");
         }
-        layer = DefineOutput(*this, U32[1], spv::BuiltIn::Layer);
+        layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer);
     }
     if (info.stores_viewport_index &&
         (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
         if (stage == Stage::Fragment) {
             throw NotImplementedException("Storing ViewportIndex in fragment stage");
         }
-        viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex);
+        viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex);
     }
     for (size_t index = 0; index < info.stores_generics.size(); ++index) {
         if (info.stores_generics[index]) {
-            DefineGenericOutput(*this, index);
+            DefineGenericOutput(*this, index, invocations);
         }
     }
-    if (stage == Stage::Fragment) {
+    switch (stage) {
+    case Stage::TessellationControl:
+        if (info.stores_tess_level_outer) {
+            const Id type{TypeArray(F32[1], Constant(U32[1], 4))};
+            output_tess_level_outer =
+                DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter);
+            Decorate(output_tess_level_outer, spv::Decoration::Patch);
+        }
+        if (info.stores_tess_level_inner) {
+            const Id type{TypeArray(F32[1], Constant(U32[1], 2))};
+            output_tess_level_inner =
+                DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner);
+            Decorate(output_tess_level_inner, spv::Decoration::Patch);
+        }
+        for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+            if (!info.uses_patches[index]) {
+                continue;
+            }
+            const Id id{DefineOutput(*this, F32[4], std::nullopt)};
+            Decorate(id, spv::Decoration::Patch);
+            Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+            patches[index] = id;
+        }
+        break;
+    case Stage::Fragment:
         for (u32 index = 0; index < 8; ++index) {
             if (!info.stores_frag_color[index]) {
                 continue;
             }
-            frag_color[index] = DefineOutput(*this, F32[4]);
+            frag_color[index] = DefineOutput(*this, F32[4], std::nullopt);
             Decorate(frag_color[index], spv::Decoration::Location, index);
             Name(frag_color[index], fmt::format("frag_color{}", index));
         }
         if (info.stores_frag_depth) {
-            frag_depth = DefineOutput(*this, F32[1]);
+            frag_depth = DefineOutput(*this, F32[1], std::nullopt);
             Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
             Name(frag_depth, "frag_depth");
         }
+        break;
+    default:
+        break;
     }
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 0da14d5f8e..ba0a253b35 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -147,6 +147,7 @@ public:
 
     Id workgroup_id{};
     Id local_invocation_id{};
+    Id invocation_id{};
     Id is_helper_invocation{};
     Id subgroup_local_invocation_id{};
     Id subgroup_mask_eq{};
@@ -162,6 +163,7 @@ public:
     Id base_vertex{};
     Id front_face{};
     Id point_coord{};
+    Id tess_coord{};
     Id clip_distances{};
     Id layer{};
     Id viewport_index{};
@@ -204,6 +206,10 @@ public:
     Id output_position{};
     std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
 
+    Id output_tess_level_outer{};
+    Id output_tess_level_inner{};
+    std::array<Id, 30> patches{};
+
     std::array<Id, 8> frag_color{};
     Id frag_depth{};
 
@@ -212,7 +218,7 @@ public:
 private:
     void DefineCommonTypes(const Info& info);
     void DefineCommonConstants();
-    void DefineInterfaces(const Info& info);
+    void DefineInterfaces(const IR::Program& program);
     void DefineLocalMemory(const IR::Program& program);
     void DefineSharedMemory(const IR::Program& program);
     void DefineSharedMemoryFunctions(const IR::Program& program);
@@ -226,7 +232,7 @@ private:
     void DefineLabels(IR::Program& program);
 
     void DefineInputs(const Info& info);
-    void DefineOutputs(const Info& info);
+    void DefineOutputs(const IR::Program& program);
 };
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 3bf4c6a9ec..105602ccf5 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -45,6 +45,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
         return arg.Label();
     } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
         return arg.Attribute();
+    } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+        return arg.Patch();
     } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
         return arg.Reg();
     }
@@ -120,6 +122,30 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
     return main;
 }
 
+spv::ExecutionMode ExecutionMode(TessPrimitive primitive) {
+    switch (primitive) {
+    case TessPrimitive::Isolines:
+        return spv::ExecutionMode::Isolines;
+    case TessPrimitive::Triangles:
+        return spv::ExecutionMode::Triangles;
+    case TessPrimitive::Quads:
+        return spv::ExecutionMode::Quads;
+    }
+    throw InvalidArgument("Tessellation primitive {}", primitive);
+}
+
+spv::ExecutionMode ExecutionMode(TessSpacing spacing) {
+    switch (spacing) {
+    case TessSpacing::Equal:
+        return spv::ExecutionMode::SpacingEqual;
+    case TessSpacing::FractionalOdd:
+        return spv::ExecutionMode::SpacingFractionalOdd;
+    case TessSpacing::FractionalEven:
+        return spv::ExecutionMode::SpacingFractionalEven;
+    }
+    throw InvalidArgument("Tessellation spacing {}", spacing);
+}
+
 void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
     const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
     spv::ExecutionModel execution_model{};
@@ -134,6 +160,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
     case Stage::VertexB:
         execution_model = spv::ExecutionModel::Vertex;
         break;
+    case Stage::TessellationControl:
+        execution_model = spv::ExecutionModel::TessellationControl;
+        ctx.AddCapability(spv::Capability::Tessellation);
+        ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations);
+        break;
+    case Stage::TessellationEval:
+        execution_model = spv::ExecutionModel::TessellationEvaluation;
+        ctx.AddCapability(spv::Capability::Tessellation);
+        ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_primitive));
+        ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_spacing));
+        ctx.AddExecutionMode(main, ctx.profile.tess_clockwise ? spv::ExecutionMode::VertexOrderCw
+                                                              : spv::ExecutionMode::VertexOrderCcw);
+        break;
     case Stage::Geometry:
         execution_model = spv::ExecutionModel::Geometry;
         ctx.AddCapability(spv::Capability::Geometry);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 55b2edba0c..8caf30f1b0 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -55,6 +55,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
 Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
 void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
+Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
 void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
 void EmitSetFragDepth(EmitContext& ctx, Id value);
 void EmitGetZFlag(EmitContext& ctx);
@@ -67,6 +69,7 @@ void EmitSetCFlag(EmitContext& ctx);
 void EmitSetOFlag(EmitContext& ctx);
 Id EmitWorkgroupId(EmitContext& ctx);
 Id EmitLocalInvocationId(EmitContext& ctx);
+Id EmitInvocationId(EmitContext& ctx);
 Id EmitIsHelperInvocation(EmitContext& ctx);
 Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
 void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 59c56c5ba8..4a1aeece5a 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -32,13 +32,26 @@ std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
 
 template <typename... Args>
 Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) {
-    if (ctx.stage == Stage::Geometry) {
+    switch (ctx.stage) {
+    case Stage::TessellationControl:
+    case Stage::TessellationEval:
+    case Stage::Geometry:
         return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...);
-    } else {
+    default:
         return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...);
     }
 }
 
+template <typename... Args>
+Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
+    if (ctx.stage == Stage::TessellationControl) {
+        const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)};
+        return ctx.OpAccessChain(result_type, base, invocation_id, std::forward<Args>(args)...);
+    } else {
+        return ctx.OpAccessChain(result_type, base, std::forward<Args>(args)...);
+    }
+}
+
 std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
     if (IR::IsGeneric(attr)) {
         const u32 index{IR::GenericAttributeIndex(attr)};
@@ -49,7 +62,7 @@ std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
         } else {
             const u32 index_element{element - info.first_element};
             const Id index_id{ctx.Constant(ctx.U32[1], index_element)};
-            return ctx.OpAccessChain(ctx.output_f32, info.id, index_id);
+            return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
         }
     }
     switch (attr) {
@@ -61,7 +74,7 @@ std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
     case IR::Attribute::PositionW: {
         const u32 element{static_cast<u32>(attr) % 4};
         const Id element_id{ctx.Constant(ctx.U32[1], element)};
-        return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id);
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
     }
     case IR::Attribute::ClipDistance0:
     case IR::Attribute::ClipDistance1:
@@ -74,7 +87,7 @@ std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
         const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)};
         const u32 index{static_cast<u32>(attr) - base};
         const Id clip_num{ctx.Constant(ctx.U32[1], index)};
-        return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num);
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num);
     }
     case IR::Attribute::Layer:
         return ctx.profile.support_viewport_index_layer_non_geometry ||
@@ -222,11 +235,18 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
                             ctx.Constant(ctx.U32[1], std::numeric_limits<u32>::max()),
                             ctx.u32_zero_value);
     case IR::Attribute::PointSpriteS:
-        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord,
-                                                  ctx.u32_zero_value));
+        return ctx.OpLoad(ctx.F32[1],
+                          ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));
     case IR::Attribute::PointSpriteT:
-        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord,
-                                                  ctx.Constant(ctx.U32[1], 1U)));
+        return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord,
+                                                        ctx.Constant(ctx.U32[1], 1U)));
+    case IR::Attribute::TessellationEvaluationPointU:
+        return ctx.OpLoad(ctx.F32[1],
+                          ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
+    case IR::Attribute::TessellationEvaluationPointV:
+        return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord,
+                                                        ctx.Constant(ctx.U32[1], 1U)));
+
     default:
         throw NotImplementedException("Read attribute {}", attr);
     }
@@ -240,9 +260,12 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_un
 }
 
 Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) {
-    if (ctx.stage == Stage::Geometry) {
+    switch (ctx.stage) {
+    case Stage::TessellationControl:
+    case Stage::TessellationEval:
+    case Stage::Geometry:
         return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex);
-    } else {
+    default:
         return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset);
     }
 }
@@ -251,6 +274,45 @@ void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unus
     ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value);
 }
 
+Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
+    if (!IR::IsGeneric(patch)) {
+        throw NotImplementedException("Non-generic patch load");
+    }
+    const u32 index{IR::GenericPatchIndex(patch)};
+    const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))};
+    const Id pointer{ctx.OpAccessChain(ctx.input_f32, ctx.patches.at(index), element)};
+    return ctx.OpLoad(ctx.F32[1], pointer);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
+    const Id pointer{[&] {
+        if (IR::IsGeneric(patch)) {
+            const u32 index{IR::GenericPatchIndex(patch)};
+            const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))};
+            return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
+        }
+        switch (patch) {
+        case IR::Patch::TessellationLodLeft:
+        case IR::Patch::TessellationLodRight:
+        case IR::Patch::TessellationLodTop:
+        case IR::Patch::TessellationLodBottom: {
+            const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+            const Id index_id{ctx.Constant(ctx.U32[1], index)};
+            return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
+        }
+        case IR::Patch::TessellationLodInteriorU:
+            return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
+                                     ctx.u32_zero_value);
+        case IR::Patch::TessellationLodInteriorV:
+            return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
+                                     ctx.Constant(ctx.U32[1], 1u));
+        default:
+            throw NotImplementedException("Patch {}", patch);
+        }
+    }()};
+    ctx.OpStore(pointer, value);
+}
+
 void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
     const Id component_id{ctx.Constant(ctx.U32[1], component)};
     const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
@@ -301,6 +363,10 @@ Id EmitLocalInvocationId(EmitContext& ctx) {
     return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id);
 }
 
+Id EmitInvocationId(EmitContext& ctx) {
+    return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
+}
+
 Id EmitIsHelperInvocation(EmitContext& ctx) {
     return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index d66eb17a6e..b821d9f476 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -331,6 +331,14 @@ void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, c
     Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex);
 }
 
+F32 IREmitter::GetPatch(Patch patch) {
+    return Inst<F32>(Opcode::GetPatch, patch);
+}
+
+void IREmitter::SetPatch(Patch patch, const F32& value) {
+    Inst(Opcode::SetPatch, patch, value);
+}
+
 void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
     Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
 }
@@ -363,6 +371,10 @@ U32 IREmitter::LocalInvocationIdZ() {
     return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)};
 }
 
+U32 IREmitter::InvocationId() {
+    return Inst<U32>(Opcode::InvocationId);
+}
+
 U1 IREmitter::IsHelperInvocation() {
     return Inst<U1>(Opcode::IsHelperInvocation);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index e70359eb11..7f8f1ad426 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -84,6 +84,9 @@ public:
     [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex);
     void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex);
 
+    [[nodiscard]] F32 GetPatch(Patch patch);
+    void SetPatch(Patch patch, const F32& value);
+
     void SetFragColor(u32 index, u32 component, const F32& value);
     void SetFragDepth(const F32& value);
 
@@ -95,6 +98,7 @@ public:
     [[nodiscard]] U32 LocalInvocationIdY();
     [[nodiscard]] U32 LocalInvocationIdZ();
 
+    [[nodiscard]] U32 InvocationId();
     [[nodiscard]] U1 IsHelperInvocation();
 
     [[nodiscard]] U32 LaneId();
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 204c55fa85..b2d7573d99 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -73,6 +73,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::EndPrimitive:
     case Opcode::SetAttribute:
     case Opcode::SetAttributeIndexed:
+    case Opcode::SetPatch:
     case Opcode::SetFragColor:
     case Opcode::SetFragDepth:
     case Opcode::WriteGlobalU8:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
index 7d3e0b2ab5..7f04b647b0 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.cpp
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -24,6 +24,7 @@ constexpr Type Label{Type::Label};
 constexpr Type Reg{Type::Reg};
 constexpr Type Pred{Type::Pred};
 constexpr Type Attribute{Type::Attribute};
+constexpr Type Patch{Type::Patch};
 constexpr Type U1{Type::U1};
 constexpr Type U8{Type::U8};
 constexpr Type U16{Type::U16};
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 7a21fe7465..a86542cd8d 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -48,6 +48,8 @@ OPCODE(GetAttribute,                                        F32,            Attr
 OPCODE(SetAttribute,                                        Void,           Attribute,      F32,            U32,                                            )
 OPCODE(GetAttributeIndexed,                                 F32,            U32,            U32,                                                            )
 OPCODE(SetAttributeIndexed,                                 Void,           U32,            F32,            U32,                                            )
+OPCODE(GetPatch,                                            F32,            Patch,                                                                          )
+OPCODE(SetPatch,                                            Void,           Patch,          F32,                                                            )
 OPCODE(SetFragColor,                                        Void,           U32,            U32,            F32,                                            )
 OPCODE(SetFragDepth,                                        Void,           F32,                                                                            )
 OPCODE(GetZFlag,                                            U1,             Void,                                                                           )
@@ -60,6 +62,7 @@ OPCODE(SetCFlag,                                            Void,           U1,
 OPCODE(SetOFlag,                                            Void,           U1,                                                                             )
 OPCODE(WorkgroupId,                                         U32x3,                                                                                          )
 OPCODE(LocalInvocationId,                                   U32x3,                                                                                          )
+OPCODE(InvocationId,                                        U32,                                                                                            )
 OPCODE(IsHelperInvocation,                                  U1,                                                                                             )
 
 // Undefined
diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp
new file mode 100644
index 0000000000..1f770bc488
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.cpp
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/patch.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::IR {
+
+bool IsGeneric(Patch patch) noexcept {
+    return patch >= Patch::Component0 && patch <= Patch::Component119;
+}
+
+u32 GenericPatchIndex(Patch patch) {
+    if (!IsGeneric(patch)) {
+        throw InvalidArgument("Patch {} is not generic", patch);
+    }
+    return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
+}
+
+u32 GenericPatchElement(Patch patch) {
+    if (!IsGeneric(patch)) {
+        throw InvalidArgument("Patch {} is not generic", patch);
+    }
+    return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h
new file mode 100644
index 0000000000..6d66ff0d6c
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.h
@@ -0,0 +1,149 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+
+enum class Patch : u64 {
+    TessellationLodLeft,
+    TessellationLodTop,
+    TessellationLodRight,
+    TessellationLodBottom,
+    TessellationLodInteriorU,
+    TessellationLodInteriorV,
+    ComponentPadding0,
+    ComponentPadding1,
+    Component0,
+    Component1,
+    Component2,
+    Component3,
+    Component4,
+    Component5,
+    Component6,
+    Component7,
+    Component8,
+    Component9,
+    Component10,
+    Component11,
+    Component12,
+    Component13,
+    Component14,
+    Component15,
+    Component16,
+    Component17,
+    Component18,
+    Component19,
+    Component20,
+    Component21,
+    Component22,
+    Component23,
+    Component24,
+    Component25,
+    Component26,
+    Component27,
+    Component28,
+    Component29,
+    Component30,
+    Component31,
+    Component32,
+    Component33,
+    Component34,
+    Component35,
+    Component36,
+    Component37,
+    Component38,
+    Component39,
+    Component40,
+    Component41,
+    Component42,
+    Component43,
+    Component44,
+    Component45,
+    Component46,
+    Component47,
+    Component48,
+    Component49,
+    Component50,
+    Component51,
+    Component52,
+    Component53,
+    Component54,
+    Component55,
+    Component56,
+    Component57,
+    Component58,
+    Component59,
+    Component60,
+    Component61,
+    Component62,
+    Component63,
+    Component64,
+    Component65,
+    Component66,
+    Component67,
+    Component68,
+    Component69,
+    Component70,
+    Component71,
+    Component72,
+    Component73,
+    Component74,
+    Component75,
+    Component76,
+    Component77,
+    Component78,
+    Component79,
+    Component80,
+    Component81,
+    Component82,
+    Component83,
+    Component84,
+    Component85,
+    Component86,
+    Component87,
+    Component88,
+    Component89,
+    Component90,
+    Component91,
+    Component92,
+    Component93,
+    Component94,
+    Component95,
+    Component96,
+    Component97,
+    Component98,
+    Component99,
+    Component100,
+    Component101,
+    Component102,
+    Component103,
+    Component104,
+    Component105,
+    Component106,
+    Component107,
+    Component108,
+    Component109,
+    Component110,
+    Component111,
+    Component112,
+    Component113,
+    Component114,
+    Component115,
+    Component116,
+    Component117,
+    Component118,
+    Component119,
+};
+static_assert(static_cast<u64>(Patch::Component119) == 127);
+
+[[nodiscard]] bool IsGeneric(Patch patch) noexcept;
+
+[[nodiscard]] u32 GenericPatchIndex(Patch patch);
+
+[[nodiscard]] u32 GenericPatchElement(Patch patch);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
index 9a32ca1e8a..8b3b338528 100644
--- a/src/shader_recompiler/frontend/ir/type.h
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -20,26 +20,27 @@ enum class Type {
     Reg = 1 << 2,
     Pred = 1 << 3,
     Attribute = 1 << 4,
-    U1 = 1 << 5,
-    U8 = 1 << 6,
-    U16 = 1 << 7,
-    U32 = 1 << 8,
-    U64 = 1 << 9,
-    F16 = 1 << 10,
-    F32 = 1 << 11,
-    F64 = 1 << 12,
-    U32x2 = 1 << 13,
-    U32x3 = 1 << 14,
-    U32x4 = 1 << 15,
-    F16x2 = 1 << 16,
-    F16x3 = 1 << 17,
-    F16x4 = 1 << 18,
-    F32x2 = 1 << 19,
-    F32x3 = 1 << 20,
-    F32x4 = 1 << 21,
-    F64x2 = 1 << 22,
-    F64x3 = 1 << 23,
-    F64x4 = 1 << 24,
+    Patch = 1 << 5,
+    U1 = 1 << 6,
+    U8 = 1 << 7,
+    U16 = 1 << 8,
+    U32 = 1 << 9,
+    U64 = 1 << 10,
+    F16 = 1 << 11,
+    F32 = 1 << 12,
+    F64 = 1 << 13,
+    U32x2 = 1 << 14,
+    U32x3 = 1 << 15,
+    U32x4 = 1 << 16,
+    F16x2 = 1 << 17,
+    F16x3 = 1 << 18,
+    F16x4 = 1 << 19,
+    F32x2 = 1 << 20,
+    F32x3 = 1 << 21,
+    F32x4 = 1 << 22,
+    F64x2 = 1 << 23,
+    F64x3 = 1 << 24,
+    F64x4 = 1 << 25,
 };
 DECLARE_ENUM_FLAG_OPERATORS(Type)
 
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index 1e7ffb86d5..bf5f8c0c20 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -18,6 +18,8 @@ Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {}
 
 Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
 
+Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {}
+
 Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
 
 Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
@@ -109,6 +111,11 @@ IR::Attribute Value::Attribute() const {
     return attribute;
 }
 
+IR::Patch Value::Patch() const {
+    ValidateAccess(Type::Patch);
+    return patch;
+}
+
 bool Value::U1() const {
     if (IsIdentity()) {
         return inst->Arg(0).U1();
@@ -182,6 +189,8 @@ bool Value::operator==(const Value& other) const {
         return pred == other.pred;
     case Type::Attribute:
         return attribute == other.attribute;
+    case Type::Patch:
+        return patch == other.patch;
     case Type::U1:
         return imm_u1 == other.imm_u1;
     case Type::U8:
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index a0962863d8..3037455632 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -9,6 +9,7 @@
 #include "shader_recompiler/frontend/ir/attribute.h"
 #include "shader_recompiler/frontend/ir/pred.h"
 #include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/ir/patch.h"
 #include "shader_recompiler/frontend/ir/type.h"
 
 namespace Shader::IR {
@@ -24,6 +25,7 @@ public:
     explicit Value(IR::Reg value) noexcept;
     explicit Value(IR::Pred value) noexcept;
     explicit Value(IR::Attribute value) noexcept;
+    explicit Value(IR::Patch value) noexcept;
     explicit Value(bool value) noexcept;
     explicit Value(u8 value) noexcept;
     explicit Value(u16 value) noexcept;
@@ -46,6 +48,7 @@ public:
     [[nodiscard]] IR::Reg Reg() const;
     [[nodiscard]] IR::Pred Pred() const;
     [[nodiscard]] IR::Attribute Attribute() const;
+    [[nodiscard]] IR::Patch Patch() const;
     [[nodiscard]] bool U1() const;
     [[nodiscard]] u8 U8() const;
     [[nodiscard]] u16 U16() const;
@@ -67,6 +70,7 @@ private:
         IR::Reg reg;
         IR::Pred pred;
         IR::Attribute attribute;
+        IR::Patch patch;
         bool imm_u1;
         u8 imm_u8;
         u16 imm_u16;
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index ab67446c80..20a1d61cc4 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -70,6 +70,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
     program.stage = env.ShaderStage();
     program.local_memory_size = env.LocalMemorySize();
     switch (program.stage) {
+    case Stage::TessellationControl: {
+        const ProgramHeader& sph{env.SPH()};
+        program.invocations = sph.common2.threads_per_input_primitive;
+        break;
+    }
     case Stage::Geometry: {
         const ProgramHeader& sph{env.SPH()};
         program.output_topology = sph.common3.output_topology;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
index eb6a80de23..7d7dcc3cbd 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -70,12 +70,6 @@ void TranslatorVisitor::ALD(u64 insn) {
         BitField<47, 2, Size> size;
     } const ald{insn};
 
-    if (ald.o != 0) {
-        throw NotImplementedException("O");
-    }
-    if (ald.patch != 0) {
-        throw NotImplementedException("P");
-    }
     const u64 offset{ald.absolute_offset.Value()};
     if (offset % 4 != 0) {
         throw NotImplementedException("Unaligned absolute offset {}", offset);
@@ -84,11 +78,19 @@ void TranslatorVisitor::ALD(u64 insn) {
     const u32 num_elements{NumElements(ald.size)};
     if (ald.index_reg == IR::Reg::RZ) {
         for (u32 element = 0; element < num_elements; ++element) {
-            const IR::Attribute attr{offset / 4 + element};
-            F(ald.dest_reg + element, ir.GetAttribute(attr, vertex));
+            if (ald.patch != 0) {
+                const IR::Patch patch{offset / 4 + element};
+                F(ald.dest_reg + element, ir.GetPatch(patch));
+            } else {
+                const IR::Attribute attr{offset / 4 + element};
+                F(ald.dest_reg + element, ir.GetAttribute(attr, vertex));
+            }
         }
         return;
     }
+    if (ald.patch != 0) {
+        throw NotImplementedException("Indirect patch read");
+    }
     HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
         F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex));
     });
@@ -106,9 +108,6 @@ void TranslatorVisitor::AST(u64 insn) {
         BitField<47, 2, Size> size;
     } const ast{insn};
 
-    if (ast.patch != 0) {
-        throw NotImplementedException("P");
-    }
     if (ast.index_reg != IR::Reg::RZ) {
         throw NotImplementedException("Indexed store");
     }
@@ -120,11 +119,19 @@ void TranslatorVisitor::AST(u64 insn) {
     const u32 num_elements{NumElements(ast.size)};
     if (ast.index_reg == IR::Reg::RZ) {
         for (u32 element = 0; element < num_elements; ++element) {
-            const IR::Attribute attr{offset / 4 + element};
-            ir.SetAttribute(attr, F(ast.src_reg + element), vertex);
+            if (ast.patch != 0) {
+                const IR::Patch patch{offset / 4 + element};
+                ir.SetPatch(patch, F(ast.src_reg + element));
+            } else {
+                const IR::Attribute attr{offset / 4 + element};
+                ir.SetAttribute(attr, F(ast.src_reg + element), vertex);
+            }
         }
         return;
     }
+    if (ast.patch != 0) {
+        throw NotImplementedException("Indexed tessellation patch store");
+    }
     HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
         ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex);
     });
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
index bc822d585e..660b84c20b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -113,6 +113,8 @@ enum class SpecialRegister : u64 {
 
 [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
     switch (special_register) {
+    case SpecialRegister::SR_INVOCATION_ID:
+        return ir.InvocationId();
     case SpecialRegister::SR_THREAD_KILL:
         return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
     case SpecialRegister::SR_INVOCATION_INFO:
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 617ec05cee..aadcf7999c 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -53,6 +53,10 @@ void GetAttribute(Info& info, IR::Attribute attribute) {
     case IR::Attribute::PointSpriteT:
         info.loads_point_coord = true;
         break;
+    case IR::Attribute::TessellationEvaluationPointU:
+    case IR::Attribute::TessellationEvaluationPointV:
+        info.loads_tess_coord = true;
+        break;
     default:
         throw NotImplementedException("Get attribute {}", attribute);
     }
@@ -94,6 +98,34 @@ void SetAttribute(Info& info, IR::Attribute attribute) {
     }
 }
 
+void GetPatch(Info& info, IR::Patch patch) {
+    if (!IR::IsGeneric(patch)) {
+        throw NotImplementedException("Reading non-generic patch {}", patch);
+    }
+    info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
+}
+
+void SetPatch(Info& info, IR::Patch patch) {
+    if (IR::IsGeneric(patch)) {
+        info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
+        return;
+    }
+    switch (patch) {
+    case IR::Patch::TessellationLodLeft:
+    case IR::Patch::TessellationLodTop:
+    case IR::Patch::TessellationLodRight:
+    case IR::Patch::TessellationLodBottom:
+        info.stores_tess_level_outer = true;
+        break;
+    case IR::Patch::TessellationLodInteriorU:
+    case IR::Patch::TessellationLodInteriorV:
+        info.stores_tess_level_inner = true;
+        break;
+    default:
+        throw NotImplementedException("Set patch {}", patch);
+    }
+}
+
 void VisitUsages(Info& info, IR::Inst& inst) {
     switch (inst.GetOpcode()) {
     case IR::Opcode::CompositeConstructF16x2:
@@ -350,6 +382,12 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::SetAttribute:
         SetAttribute(info, inst.Arg(0).Attribute());
         break;
+    case IR::Opcode::GetPatch:
+        GetPatch(info, inst.Arg(0).Patch());
+        break;
+    case IR::Opcode::SetPatch:
+        SetPatch(info, inst.Arg(0).Patch());
+        break;
     case IR::Opcode::GetAttributeIndexed:
         info.loads_indexed_attributes = true;
         break;
@@ -368,6 +406,9 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::LocalInvocationId:
         info.uses_local_invocation_id = true;
         break;
+    case IR::Opcode::InvocationId:
+        info.uses_invocation_id = true;
+        break;
     case IR::Opcode::IsHelperInvocation:
         info.uses_is_helper_invocation = true;
         break;
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index c26017d75f..3a04f075ee 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -38,6 +38,18 @@ enum class CompareFunction {
     Always,
 };
 
+enum class TessPrimitive {
+    Isolines,
+    Triangles,
+    Quads,
+};
+
+enum class TessSpacing {
+    Equal,
+    FractionalOdd,
+    FractionalEven,
+};
+
 struct TransformFeedbackVarying {
     u32 buffer{};
     u32 stride{};
@@ -74,6 +86,10 @@ struct Profile {
     bool convert_depth_mode{};
     bool force_early_z{};
 
+    TessPrimitive tess_primitive{};
+    TessSpacing tess_spacing{};
+    bool tess_clockwise{};
+
     InputTopology input_topology{};
 
     std::optional<float> fixed_state_point_size;
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 336c6131ab..4dbf9ed12a 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -101,8 +101,10 @@ struct Info {
 
     bool uses_workgroup_id{};
     bool uses_local_invocation_id{};
+    bool uses_invocation_id{};
     bool uses_is_helper_invocation{};
     bool uses_subgroup_invocation_id{};
+    std::array<bool, 30> uses_patches{};
 
     std::array<InputVarying, 32> input_generics{};
     bool loads_position{};
@@ -110,6 +112,7 @@ struct Info {
     bool loads_vertex_id{};
     bool loads_front_face{};
     bool loads_point_coord{};
+    bool loads_tess_coord{};
     bool loads_indexed_attributes{};
 
     std::array<bool, 8> stores_frag_color{};
@@ -120,6 +123,8 @@ struct Info {
     bool stores_clip_distance{};
     bool stores_layer{};
     bool stores_viewport_index{};
+    bool stores_tess_level_outer{};
+    bool stores_tess_level_inner{};
     bool stores_indexed_attributes{};
 
     bool uses_fp16{};
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index dc4ff0da2b..8f0b0b8ecc 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -685,6 +685,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) {
     return {};
 }
 
+VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) {
+    switch (polygon_mode) {
+    case Maxwell::PolygonMode::Point:
+        return VK_POLYGON_MODE_POINT;
+    case Maxwell::PolygonMode::Line:
+        return VK_POLYGON_MODE_LINE;
+    case Maxwell::PolygonMode::Fill:
+        return VK_POLYGON_MODE_FILL;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode);
+    return {};
+}
+
 VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
     switch (swizzle) {
     case Tegra::Texture::SwizzleSource::Zero:
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 9f78e15b6a..50a599c116 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -65,6 +65,8 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face);
 
 VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
 
+VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode);
+
 VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
 
 VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 84720a6f92..d5e9dae0f7 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -355,7 +355,8 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
             static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
         .rasterizerDiscardEnable =
             static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
-        .polygonMode = VK_POLYGON_MODE_FILL,
+        .polygonMode =
+            MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)),
         .cullMode = static_cast<VkCullModeFlags>(
             dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
         .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index ee22255bfa..0bccc640ac 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -1040,6 +1040,36 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key,
         std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(),
                                &CastAttributeType);
         break;
+    case Shader::Stage::TessellationEval:
+        // We have to flip tessellation clockwise for some reason...
+        profile.tess_clockwise = key.state.tessellation_clockwise == 0;
+        profile.tess_primitive = [&key] {
+            const u32 raw{key.state.tessellation_primitive.Value()};
+            switch (static_cast<Maxwell::TessellationPrimitive>(raw)) {
+            case Maxwell::TessellationPrimitive::Isolines:
+                return Shader::TessPrimitive::Isolines;
+            case Maxwell::TessellationPrimitive::Triangles:
+                return Shader::TessPrimitive::Triangles;
+            case Maxwell::TessellationPrimitive::Quads:
+                return Shader::TessPrimitive::Quads;
+            }
+            UNREACHABLE();
+            return Shader::TessPrimitive::Triangles;
+        }();
+        profile.tess_spacing = [&] {
+            const u32 raw{key.state.tessellation_spacing};
+            switch (static_cast<Maxwell::TessellationSpacing>(raw)) {
+            case Maxwell::TessellationSpacing::Equal:
+                return Shader::TessSpacing::Equal;
+            case Maxwell::TessellationSpacing::FractionalOdd:
+                return Shader::TessSpacing::FractionalOdd;
+            case Maxwell::TessellationSpacing::FractionalEven:
+                return Shader::TessSpacing::FractionalEven;
+            }
+            UNREACHABLE();
+            return Shader::TessSpacing::Equal;
+        }();
+        break;
     case Shader::Stage::Geometry:
         if (program.output_topology == Shader::OutputTopology::PointList) {
             profile.fixed_state_point_size = point_size;
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 0412b52343..555b12ed72 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
         .flags = 0,
         .size = STREAM_BUFFER_SIZE,
         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
-                 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+                 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
         .queueFamilyIndexCount = 0,
         .pQueueFamilyIndices = nullptr,
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 87cfe6312d..f0de19ba11 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -225,7 +225,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
             .drawIndirectFirstInstance = false,
             .depthClamp = true,
             .depthBiasClamp = true,
-            .fillModeNonSolid = false,
+            .fillModeNonSolid = true,
             .depthBounds = false,
             .wideLines = false,
             .largePoints = true,
@@ -670,6 +670,7 @@ void Device::CheckSuitability(bool requires_swapchain) const {
         std::make_pair(features.largePoints, "largePoints"),
         std::make_pair(features.multiViewport, "multiViewport"),
         std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
+        std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"),
         std::make_pair(features.geometryShader, "geometryShader"),
         std::make_pair(features.tessellationShader, "tessellationShader"),
         std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
-- 
cgit v1.2.3-70-g09d2


From 80940b17069f6baa733a9b572445b27bc7509137 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 16 Apr 2021 18:47:26 -0300
Subject: shader: Implement SampleMask

---
 src/shader_recompiler/backend/spirv/emit_context.cpp                 | 5 ++++-
 src/shader_recompiler/backend/spirv/emit_context.h                   | 1 +
 src/shader_recompiler/backend/spirv/emit_spirv.h                     | 1 +
 src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp   | 4 ++++
 src/shader_recompiler/frontend/ir/ir_emitter.cpp                     | 4 ++++
 src/shader_recompiler/frontend/ir/ir_emitter.h                       | 1 +
 src/shader_recompiler/frontend/ir/microinstruction.cpp               | 1 +
 src/shader_recompiler/frontend/ir/opcodes.inc                        | 1 +
 .../frontend/maxwell/translate/impl/exit_program.cpp                 | 2 +-
 src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp            | 3 +++
 src/shader_recompiler/shader_info.h                                  | 1 +
 11 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 0b4abeb441..b9e6d56557 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -1179,7 +1179,10 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
         if (info.stores_frag_depth) {
             frag_depth = DefineOutput(*this, F32[1], std::nullopt);
             Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
-            Name(frag_depth, "frag_depth");
+        }
+        if (info.stores_sample_mask) {
+            sample_mask = DefineOutput(*this, U32[1], std::nullopt);
+            Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask);
         }
         break;
     default:
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 9d83403336..528dc33feb 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -215,6 +215,7 @@ public:
     std::array<Id, 30> patches{};
 
     std::array<Id, 8> frag_color{};
+    Id sample_mask{};
     Id frag_depth{};
 
     std::vector<Id> interfaces;
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index dfddf5e582..9f658a4bd0 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -58,6 +58,7 @@ void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
 Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
 void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
 void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
+void EmitSetSampleMask(EmitContext& ctx, Id value);
 void EmitSetFragDepth(EmitContext& ctx, Id value);
 void EmitGetZFlag(EmitContext& ctx);
 void EmitGetSFlag(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 7555dd94c3..e5e4c352b8 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -343,6 +343,10 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
     ctx.OpStore(pointer, value);
 }
 
+void EmitSetSampleMask(EmitContext& ctx, Id value) {
+    ctx.OpStore(ctx.sample_mask, value);
+}
+
 void EmitSetFragDepth(EmitContext& ctx, Id value) {
     ctx.OpStore(ctx.frag_depth, value);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 141efd86cb..ef3b00bc27 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -343,6 +343,10 @@ void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
     Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
 }
 
+void IREmitter::SetSampleMask(const U32& value) {
+    Inst(Opcode::SetSampleMask, value);
+}
+
 void IREmitter::SetFragDepth(const F32& value) {
     Inst(Opcode::SetFragDepth, value);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 81833d928d..1a585df154 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -88,6 +88,7 @@ public:
     void SetPatch(Patch patch, const F32& value);
 
     void SetFragColor(u32 index, u32 component, const F32& value);
+    void SetSampleMask(const U32& value);
     void SetFragDepth(const F32& value);
 
     [[nodiscard]] U32 WorkgroupIdX();
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index b2d7573d99..b53fe2e2a9 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -75,6 +75,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::SetAttributeIndexed:
     case Opcode::SetPatch:
     case Opcode::SetFragColor:
+    case Opcode::SetSampleMask:
     case Opcode::SetFragDepth:
     case Opcode::WriteGlobalU8:
     case Opcode::WriteGlobalS8:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index d5e443673e..0748efa8df 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -51,6 +51,7 @@ OPCODE(SetAttributeIndexed,                                 Void,           U32,
 OPCODE(GetPatch,                                            F32,            Patch,                                                                          )
 OPCODE(SetPatch,                                            Void,           Patch,          F32,                                                            )
 OPCODE(SetFragColor,                                        Void,           U32,            U32,            F32,                                            )
+OPCODE(SetSampleMask,                                       Void,           U32,                                                                            )
 OPCODE(SetFragDepth,                                        Void,           F32,                                                                            )
 OPCODE(GetZFlag,                                            U1,             Void,                                                                           )
 OPCODE(GetSFlag,                                            U1,             Void,                                                                           )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
index 58a53c0ec9..c2443c886f 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -22,7 +22,7 @@ void ExitFragment(TranslatorVisitor& v) {
         }
     }
     if (sph.ps.omap.sample_mask != 0) {
-        throw NotImplementedException("Sample mask");
+        v.ir.SetSampleMask(v.X(src_reg));
     }
     if (sph.ps.omap.depth != 0) {
         v.ir.SetFragDepth(v.F(src_reg + 1));
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 5d13104664..60b7d3a366 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -403,6 +403,9 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::SetFragColor:
         info.stores_frag_color[inst.Arg(0).U32()] = true;
         break;
+    case IR::Opcode::SetSampleMask:
+        info.stores_sample_mask = true;
+        break;
     case IR::Opcode::SetFragDepth:
         info.stores_frag_depth = true;
         break;
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 686f5c7191..0a89319306 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -118,6 +118,7 @@ struct Info {
     bool loads_indexed_attributes{};
 
     std::array<bool, 8> stores_frag_color{};
+    bool stores_sample_mask{};
     bool stores_frag_depth{};
     std::array<bool, 32> stores_generics{};
     bool stores_position{};
-- 
cgit v1.2.3-70-g09d2


From 0a0818c0259b4f90f1f7bb37fcffbc1f194ca4d0 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 17 Apr 2021 03:21:03 -0300
Subject: shader: Fix memory barriers

---
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  5 ++--
 .../backend/spirv/emit_spirv_barriers.cpp          | 14 ++++------
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 20 +++++----------
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  5 ++--
 .../frontend/ir/microinstruction.cpp               |  5 ++--
 src/shader_recompiler/frontend/ir/modifiers.h      |  8 ------
 src/shader_recompiler/frontend/ir/opcodes.inc      |  5 ++--
 .../maxwell/translate/impl/barrier_operations.cpp  | 30 ++++++++--------------
 8 files changed, 30 insertions(+), 62 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index cf8d74f4ed..d43c72f6e0 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -29,9 +29,8 @@ void EmitReturn(EmitContext& ctx);
 void EmitUnreachable(EmitContext& ctx);
 void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label);
 void EmitBarrier(EmitContext& ctx);
-void EmitMemoryBarrierWorkgroupLevel(EmitContext& ctx);
-void EmitMemoryBarrierDeviceLevel(EmitContext& ctx);
-void EmitMemoryBarrierSystemLevel(EmitContext& ctx);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
+void EmitDeviceMemoryBarrier(EmitContext& ctx);
 void EmitPrologue(EmitContext& ctx);
 void EmitEpilogue(EmitContext& ctx);
 void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
index 74f523d0ff..366dc6a0c8 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
@@ -7,7 +7,7 @@
 
 namespace Shader::Backend::SPIRV {
 namespace {
-void EmitMemoryBarrierImpl(EmitContext& ctx, spv::Scope scope) {
+void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
     const auto semantics{
         spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
         spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory |
@@ -27,16 +27,12 @@ void EmitBarrier(EmitContext& ctx) {
                          ctx.Constant(ctx.U32[1], static_cast<u32>(memory_semantics)));
 }
 
-void EmitMemoryBarrierWorkgroupLevel(EmitContext& ctx) {
-    EmitMemoryBarrierImpl(ctx, spv::Scope::Workgroup);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
+    MemoryBarrier(ctx, spv::Scope::Workgroup);
 }
 
-void EmitMemoryBarrierDeviceLevel(EmitContext& ctx) {
-    EmitMemoryBarrierImpl(ctx, spv::Scope::Device);
-}
-
-void EmitMemoryBarrierSystemLevel(EmitContext& ctx) {
-    EmitMemoryBarrierImpl(ctx, spv::Scope::CrossDevice);
+void EmitDeviceMemoryBarrier(EmitContext& ctx) {
+    MemoryBarrier(ctx, spv::Scope::Device);
 }
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index ef3b00bc27..aebe7200f0 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -86,20 +86,12 @@ void IREmitter::Barrier() {
     Inst(Opcode::Barrier);
 }
 
-void IREmitter::MemoryBarrier(MemoryScope scope) {
-    switch (scope) {
-    case MemoryScope::Workgroup:
-        Inst(Opcode::MemoryBarrierWorkgroupLevel);
-        break;
-    case MemoryScope::Device:
-        Inst(Opcode::MemoryBarrierDeviceLevel);
-        break;
-    case MemoryScope::System:
-        Inst(Opcode::MemoryBarrierSystemLevel);
-        break;
-    default:
-        throw InvalidArgument("Invalid memory scope {}", scope);
-    }
+void IREmitter::WorkgroupMemoryBarrier() {
+    Inst(Opcode::WorkgroupMemoryBarrier);
+}
+
+void IREmitter::DeviceMemoryBarrier() {
+    Inst(Opcode::DeviceMemoryBarrier);
 }
 
 void IREmitter::Return() {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 1a585df154..b9d051b43d 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -144,8 +144,9 @@ public:
     [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
                                const Value& false_value);
 
-    [[nodiscard]] void Barrier();
-    [[nodiscard]] void MemoryBarrier(MemoryScope scope);
+    void Barrier();
+    void WorkgroupMemoryBarrier();
+    void DeviceMemoryBarrier();
 
     template <typename Dest, typename Source>
     [[nodiscard]] Dest BitCast(const Source& value);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index b53fe2e2a9..efa4268089 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -64,9 +64,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::Unreachable:
     case Opcode::DemoteToHelperInvocation:
     case Opcode::Barrier:
-    case Opcode::MemoryBarrierWorkgroupLevel:
-    case Opcode::MemoryBarrierDeviceLevel:
-    case Opcode::MemoryBarrierSystemLevel:
+    case Opcode::WorkgroupMemoryBarrier:
+    case Opcode::DeviceMemoryBarrier:
     case Opcode::Prologue:
     case Opcode::Epilogue:
     case Opcode::EmitVertex:
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
index 447e9703ca..5d7efa14cc 100644
--- a/src/shader_recompiler/frontend/ir/modifiers.h
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -25,14 +25,6 @@ enum class FpRounding : u8 {
     RZ,       // Round towards zero
 };
 
-enum class MemoryScope : u32 {
-    DontCare,
-    Warp,
-    Workgroup,
-    Device,
-    System,
-};
-
 struct FpControl {
     bool no_contraction{false};
     FpRounding rounding{FpRounding::DontCare};
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 0748efa8df..1cfc2a9438 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -18,9 +18,8 @@ OPCODE(DemoteToHelperInvocation,                            Void,           Labe
 
 // Barriers
 OPCODE(Barrier,                                             Void,                                                                                           )
-OPCODE(MemoryBarrierWorkgroupLevel,                         Void,                                                                                           )
-OPCODE(MemoryBarrierDeviceLevel,                            Void,                                                                                           )
-OPCODE(MemoryBarrierSystemLevel,                            Void,                                                                                           )
+OPCODE(WorkgroupMemoryBarrier,                              Void,                                                                                           )
+OPCODE(DeviceMemoryBarrier,                                 Void,                                                                                           )
 
 // Special operations
 OPCODE(Prologue,                                            Void,                                                                                           )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
index 2a2a294dfa..86e433e415 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -12,34 +12,24 @@ namespace Shader::Maxwell {
 namespace {
 // Seems to be in CUDA terminology.
 enum class LocalScope : u64 {
-    CTG = 0,
-    GL = 1,
-    SYS = 2,
-    VC = 3,
+    CTA,
+    GL,
+    SYS,
+    VC,
 };
-
-IR::MemoryScope LocalScopeToMemoryScope(LocalScope scope) {
-    switch (scope) {
-    case LocalScope::CTG:
-        return IR::MemoryScope::Workgroup;
-    case LocalScope::GL:
-        return IR::MemoryScope::Device;
-    case LocalScope::SYS:
-        return IR::MemoryScope::System;
-    default:
-        throw NotImplementedException("Unimplemented Local Scope {}", scope);
-    }
-}
-
 } // Anonymous namespace
 
 void TranslatorVisitor::MEMBAR(u64 inst) {
     union {
         u64 raw;
         BitField<8, 2, LocalScope> scope;
-    } membar{inst};
+    } const membar{inst};
 
-    ir.MemoryBarrier(LocalScopeToMemoryScope(membar.scope));
+    if (membar.scope == LocalScope::CTA) {
+        ir.WorkgroupMemoryBarrier();
+    } else {
+        ir.DeviceMemoryBarrier();
+    }
 }
 
 void TranslatorVisitor::DEPBAR() {
-- 
cgit v1.2.3-70-g09d2


From 50f8007172ce143a632270510f96093c82018952 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 17 Apr 2021 16:40:35 -0300
Subject: shader: Fix Phi node types

---
 src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 ----
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp      | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index efa4268089..7555ac00a1 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -275,10 +275,6 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
     if (!value.IsImmediate()) {
         Use(value);
     }
-    if (Flags<IR::Type>() == IR::Type::Void) {
-        // Set the type of the phi node
-        SetFlags<IR::Type>(value.Type());
-    }
     phi_args.emplace_back(predecessor, value);
 }
 
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index 346fcc3774..ddd679e393 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -176,6 +176,8 @@ public:
                 } else if (!sealed_blocks.contains(block)) {
                     // Incomplete CFG
                     IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
+                    phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
+
                     incomplete_phis[block].insert_or_assign(variable, phi);
                     stack.back().result = IR::Value{&*phi};
                 } else if (const std::span imm_preds{block->ImmediatePredecessors()};
@@ -187,6 +189,8 @@ public:
                 } else {
                     // Break potential cycles with operandless phi
                     IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
+                    phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
+
                     WriteVariable(variable, block, IR::Value{phi});
 
                     stack.back().phi = phi;
-- 
cgit v1.2.3-70-g09d2


From 6944cabb899c4367a63cde97ae2bc2eb1a0fb790 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 21 Apr 2021 00:25:46 -0300
Subject: shader: Inline common Opcode and Inst functions

---
 .../frontend/ir/microinstruction.cpp               | 18 -----
 .../frontend/ir/microinstruction.h                 | 13 +++-
 src/shader_recompiler/frontend/ir/opcodes.cpp      | 90 +---------------------
 src/shader_recompiler/frontend/ir/opcodes.h        | 74 +++++++++++++++++-
 4 files changed, 83 insertions(+), 112 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 7555ac00a1..41f9fa0cd8 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -221,28 +221,10 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
     }
 }
 
-size_t Inst::NumArgs() const {
-    return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op);
-}
-
 IR::Type Inst::Type() const {
     return TypeOf(op);
 }
 
-Value Inst::Arg(size_t index) const {
-    if (op == Opcode::Phi) {
-        if (index >= phi_args.size()) {
-            throw InvalidArgument("Out of bounds argument index {} in phi instruction", index);
-        }
-        return phi_args[index].second;
-    } else {
-        if (index >= NumArgsOf(op)) {
-            throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
-        }
-        return args[index];
-    }
-}
-
 void Inst::SetArg(size_t index, Value value) {
     if (index >= NumArgs()) {
         throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index dc9f683fe5..ea55fc29cc 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -73,10 +73,19 @@ public:
     [[nodiscard]] IR::Type Type() const;
 
     /// Get the number of arguments this instruction has.
-    [[nodiscard]] size_t NumArgs() const;
+    [[nodiscard]] size_t NumArgs() const {
+        return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op);
+    }
 
     /// Get the value of a given argument index.
-    [[nodiscard]] Value Arg(size_t index) const;
+    [[nodiscard]] Value Arg(size_t index) const noexcept {
+        if (op == Opcode::Phi) {
+            return phi_args[index].second;
+        } else {
+            return args[index];
+        }
+    }
+
     /// Set the value of a given argument index.
     void SetArg(size_t index, Value value);
 
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
index 4207d548c5..24d024ad7c 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.cpp
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -2,102 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <algorithm>
-#include <array>
 #include <string_view>
 
-#include "shader_recompiler/exception.h"
 #include "shader_recompiler/frontend/ir/opcodes.h"
 
 namespace Shader::IR {
-namespace {
-struct OpcodeMeta {
-    std::string_view name;
-    Type type;
-    std::array<Type, 5> arg_types;
-};
-
-// using enum Type;
-constexpr Type Void{Type::Void};
-constexpr Type Opaque{Type::Opaque};
-constexpr Type Label{Type::Label};
-constexpr Type Reg{Type::Reg};
-constexpr Type Pred{Type::Pred};
-constexpr Type Attribute{Type::Attribute};
-constexpr Type Patch{Type::Patch};
-constexpr Type U1{Type::U1};
-constexpr Type U8{Type::U8};
-constexpr Type U16{Type::U16};
-constexpr Type U32{Type::U32};
-constexpr Type U64{Type::U64};
-constexpr Type F16{Type::F16};
-constexpr Type F32{Type::F32};
-constexpr Type F64{Type::F64};
-constexpr Type U32x2{Type::U32x2};
-constexpr Type U32x3{Type::U32x3};
-constexpr Type U32x4{Type::U32x4};
-constexpr Type F16x2{Type::F16x2};
-constexpr Type F16x3{Type::F16x3};
-constexpr Type F16x4{Type::F16x4};
-constexpr Type F32x2{Type::F32x2};
-constexpr Type F32x3{Type::F32x3};
-constexpr Type F32x4{Type::F32x4};
-constexpr Type F64x2{Type::F64x2};
-constexpr Type F64x3{Type::F64x3};
-constexpr Type F64x4{Type::F64x4};
-
-constexpr std::array META_TABLE{
-#define OPCODE(name_token, type_token, ...)                                                        \
-    OpcodeMeta{                                                                                    \
-        .name{#name_token},                                                                        \
-        .type = type_token,                                                                        \
-        .arg_types{__VA_ARGS__},                                                                   \
-    },
-#include "opcodes.inc"
-#undef OPCODE
-};
-
-constexpr size_t CalculateNumArgsOf(Opcode op) {
-    const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types};
-    return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void));
-}
-
-constexpr std::array NUM_ARGS{
-#define OPCODE(name_token, type_token, ...) CalculateNumArgsOf(Opcode::name_token),
-#include "opcodes.inc"
-#undef OPCODE
-};
-
-void ValidateOpcode(Opcode op) {
-    const size_t raw{static_cast<size_t>(op)};
-    if (raw >= META_TABLE.size()) {
-        throw InvalidArgument("Invalid opcode with raw value {}", raw);
-    }
-}
-} // Anonymous namespace
-
-Type TypeOf(Opcode op) {
-    ValidateOpcode(op);
-    return META_TABLE[static_cast<size_t>(op)].type;
-}
-
-size_t NumArgsOf(Opcode op) {
-    ValidateOpcode(op);
-    return NUM_ARGS[static_cast<size_t>(op)];
-}
-
-Type ArgTypeOf(Opcode op, size_t arg_index) {
-    ValidateOpcode(op);
-    const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types};
-    if (arg_index >= arg_types.size() || arg_types[arg_index] == Type::Void) {
-        throw InvalidArgument("Out of bounds argument");
-    }
-    return arg_types[arg_index];
-}
 
 std::string_view NameOf(Opcode op) {
-    ValidateOpcode(op);
-    return META_TABLE[static_cast<size_t>(op)].name;
+    return Detail::META_TABLE[static_cast<size_t>(op)].name;
 }
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h
index 999fb2e775..b5697c7f97 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.h
+++ b/src/shader_recompiler/frontend/ir/opcodes.h
@@ -4,6 +4,8 @@
 
 #pragma once
 
+#include <array>
+#include <algorithm>
 #include <string_view>
 
 #include <fmt/format.h>
@@ -18,14 +20,80 @@ enum class Opcode {
 #undef OPCODE
 };
 
+namespace Detail {
+
+struct OpcodeMeta {
+    std::string_view name;
+    Type type;
+    std::array<Type, 5> arg_types;
+};
+
+// using enum Type;
+constexpr Type Void{Type::Void};
+constexpr Type Opaque{Type::Opaque};
+constexpr Type Label{Type::Label};
+constexpr Type Reg{Type::Reg};
+constexpr Type Pred{Type::Pred};
+constexpr Type Attribute{Type::Attribute};
+constexpr Type Patch{Type::Patch};
+constexpr Type U1{Type::U1};
+constexpr Type U8{Type::U8};
+constexpr Type U16{Type::U16};
+constexpr Type U32{Type::U32};
+constexpr Type U64{Type::U64};
+constexpr Type F16{Type::F16};
+constexpr Type F32{Type::F32};
+constexpr Type F64{Type::F64};
+constexpr Type U32x2{Type::U32x2};
+constexpr Type U32x3{Type::U32x3};
+constexpr Type U32x4{Type::U32x4};
+constexpr Type F16x2{Type::F16x2};
+constexpr Type F16x3{Type::F16x3};
+constexpr Type F16x4{Type::F16x4};
+constexpr Type F32x2{Type::F32x2};
+constexpr Type F32x3{Type::F32x3};
+constexpr Type F32x4{Type::F32x4};
+constexpr Type F64x2{Type::F64x2};
+constexpr Type F64x3{Type::F64x3};
+constexpr Type F64x4{Type::F64x4};
+
+constexpr std::array META_TABLE{
+#define OPCODE(name_token, type_token, ...)                                                        \
+    OpcodeMeta{                                                                                    \
+        .name{#name_token},                                                                        \
+        .type = type_token,                                                                        \
+        .arg_types{__VA_ARGS__},                                                                   \
+    },
+#include "opcodes.inc"
+#undef OPCODE
+};
+
+constexpr size_t CalculateNumArgsOf(Opcode op) {
+    const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types};
+    return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void));
+}
+
+constexpr std::array NUM_ARGS{
+#define OPCODE(name_token, type_token, ...) CalculateNumArgsOf(Opcode::name_token),
+#include "opcodes.inc"
+#undef OPCODE
+};
+} // namespace Detail
+
 /// Get return type of an opcode
-[[nodiscard]] Type TypeOf(Opcode op);
+[[nodiscard]] inline Type TypeOf(Opcode op) noexcept {
+    return Detail::META_TABLE[static_cast<size_t>(op)].type;
+}
 
 /// Get the number of arguments an opcode accepts
-[[nodiscard]] size_t NumArgsOf(Opcode op);
+[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept {
+    return Detail::NUM_ARGS[static_cast<size_t>(op)];
+}
 
 /// Get the required type of an argument of an opcode
-[[nodiscard]] Type ArgTypeOf(Opcode op, size_t arg_index);
+[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept {
+    return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index];
+}
 
 /// Get the name of an opcode
 [[nodiscard]] std::string_view NameOf(Opcode op);
-- 
cgit v1.2.3-70-g09d2


From 050e81500c002f304d581f28700de549b828a2bc Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 21 Apr 2021 00:35:47 -0300
Subject: shader: Move microinstruction header to the value header

---
 src/shader_recompiler/CMakeLists.txt               |   1 -
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |   1 -
 src/shader_recompiler/backend/spirv/emit_spirv.h   |   1 -
 src/shader_recompiler/frontend/ir/basic_block.h    |   1 -
 .../frontend/ir/breadth_first_search.h             |   1 -
 .../frontend/ir/microinstruction.cpp               |   2 +-
 .../frontend/ir/microinstruction.h                 | 162 ---------------------
 src/shader_recompiler/frontend/ir/program.cpp      |   2 +-
 src/shader_recompiler/frontend/ir/value.cpp        |   1 -
 src/shader_recompiler/frontend/ir/value.h          | 151 ++++++++++++++++++-
 .../frontend/maxwell/structured_control_flow.h     |   2 +-
 .../ir_opt/collect_shader_info_pass.cpp            |   2 +-
 .../ir_opt/constant_propagation_pass.cpp           |   2 +-
 .../ir_opt/dead_code_elimination_pass.cpp          |   2 +-
 .../global_memory_to_storage_buffer_pass.cpp       |   2 +-
 .../ir_opt/identity_removal_pass.cpp               |   2 +-
 .../ir_opt/lower_fp16_to_fp32.cpp                  |   2 +-
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  |   2 +-
 src/shader_recompiler/ir_opt/verification_pass.cpp |   2 +-
 src/video_core/renderer_vulkan/vk_pipeline_cache.h |   2 +-
 20 files changed, 162 insertions(+), 181 deletions(-)
 delete mode 100644 src/shader_recompiler/frontend/ir/microinstruction.h

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 07963a7607..3d7506de2d 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -36,7 +36,6 @@ add_library(shader_recompiler STATIC
     frontend/ir/ir_emitter.cpp
     frontend/ir/ir_emitter.h
     frontend/ir/microinstruction.cpp
-    frontend/ir/microinstruction.h
     frontend/ir/modifiers.h
     frontend/ir/opcodes.cpp
     frontend/ir/opcodes.h
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 5d6fdeb65a..815b3cd957 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -10,7 +10,6 @@
 
 #include "shader_recompiler/backend/spirv/emit_spirv.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/program.h"
 
 namespace Shader::Backend::SPIRV {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 89a82e8581..4562db45b4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -8,7 +8,6 @@
 
 #include "common/common_types.h"
 #include "shader_recompiler/backend/spirv/emit_context.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/program.h"
 #include "shader_recompiler/profile.h"
 
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
index 3a42307557..ab7ddb3d52 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.h
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -13,7 +13,6 @@
 
 #include "common/bit_cast.h"
 #include "shader_recompiler/frontend/ir/condition.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/object_pool.h"
 
diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h
index b35f062d43..a52ccbd58b 100644
--- a/src/shader_recompiler/frontend/ir/breadth_first_search.h
+++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h
@@ -10,7 +10,6 @@
 
 #include <boost/container/small_vector.hpp>
 
-#include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/value.h"
 
 namespace Shader::IR {
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 41f9fa0cd8..701746a0cc 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -6,8 +6,8 @@
 #include <memory>
 
 #include "shader_recompiler/exception.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/type.h"
+#include "shader_recompiler/frontend/ir/value.h"
 
 namespace Shader::IR {
 namespace {
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
deleted file mode 100644
index ea55fc29cc..0000000000
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstring>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include <boost/container/small_vector.hpp>
-#include <boost/intrusive/list.hpp>
-
-#include "common/bit_cast.h"
-#include "common/common_types.h"
-#include "shader_recompiler/frontend/ir/opcodes.h"
-#include "shader_recompiler/frontend/ir/type.h"
-#include "shader_recompiler/frontend/ir/value.h"
-
-namespace Shader::IR {
-
-class Block;
-
-struct AssociatedInsts;
-
-class Inst : public boost::intrusive::list_base_hook<> {
-public:
-    explicit Inst(Opcode op_, u32 flags_) noexcept;
-    ~Inst();
-
-    Inst& operator=(const Inst&) = delete;
-    Inst(const Inst&) = delete;
-
-    Inst& operator=(Inst&&) = delete;
-    Inst(Inst&&) = delete;
-
-    /// Get the number of uses this instruction has.
-    [[nodiscard]] int UseCount() const noexcept {
-        return use_count;
-    }
-
-    /// Determines whether this instruction has uses or not.
-    [[nodiscard]] bool HasUses() const noexcept {
-        return use_count > 0;
-    }
-
-    /// Get the opcode this microinstruction represents.
-    [[nodiscard]] IR::Opcode GetOpcode() const noexcept {
-        return op;
-    }
-
-    /// Determines if there is a pseudo-operation associated with this instruction.
-    [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
-        return associated_insts != nullptr;
-    }
-
-    /// Determines whether or not this instruction may have side effects.
-    [[nodiscard]] bool MayHaveSideEffects() const noexcept;
-
-    /// Determines whether or not this instruction is a pseudo-instruction.
-    /// Pseudo-instructions depend on their parent instructions for their semantics.
-    [[nodiscard]] bool IsPseudoInstruction() const noexcept;
-
-    /// Determines if all arguments of this instruction are immediates.
-    [[nodiscard]] bool AreAllArgsImmediates() const;
-
-    /// Gets a pseudo-operation associated with this instruction
-    [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
-
-    /// Get the type this instruction returns.
-    [[nodiscard]] IR::Type Type() const;
-
-    /// Get the number of arguments this instruction has.
-    [[nodiscard]] size_t NumArgs() const {
-        return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op);
-    }
-
-    /// Get the value of a given argument index.
-    [[nodiscard]] Value Arg(size_t index) const noexcept {
-        if (op == Opcode::Phi) {
-            return phi_args[index].second;
-        } else {
-            return args[index];
-        }
-    }
-
-    /// Set the value of a given argument index.
-    void SetArg(size_t index, Value value);
-
-    /// Get a pointer to the block of a phi argument.
-    [[nodiscard]] Block* PhiBlock(size_t index) const;
-    /// Add phi operand to a phi instruction.
-    void AddPhiOperand(Block* predecessor, const Value& value);
-
-    void Invalidate();
-    void ClearArgs();
-
-    void ReplaceUsesWith(Value replacement);
-
-    void ReplaceOpcode(IR::Opcode opcode);
-
-    template <typename FlagsType>
-    requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
-        [[nodiscard]] FlagsType Flags() const noexcept {
-        FlagsType ret;
-        std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
-        return ret;
-    }
-
-    template <typename FlagsType>
-    requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
-        [[nodiscard]] void SetFlags(FlagsType value) noexcept {
-        std::memcpy(&flags, &value, sizeof(value));
-    }
-
-    /// Intrusively store the host definition of this instruction.
-    template <typename DefinitionType>
-    void SetDefinition(DefinitionType def) {
-        definition = Common::BitCast<u32>(def);
-    }
-
-    /// Return the intrusively stored host definition of this instruction.
-    template <typename DefinitionType>
-    [[nodiscard]] DefinitionType Definition() const noexcept {
-        return Common::BitCast<DefinitionType>(definition);
-    }
-
-private:
-    struct NonTriviallyDummy {
-        NonTriviallyDummy() noexcept {}
-    };
-
-    void Use(const Value& value);
-    void UndoUse(const Value& value);
-
-    IR::Opcode op{};
-    int use_count{};
-    u32 flags{};
-    u32 definition{};
-    union {
-        NonTriviallyDummy dummy{};
-        boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
-        std::array<Value, 5> args;
-    };
-    std::unique_ptr<AssociatedInsts> associated_insts;
-};
-static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
-
-struct AssociatedInsts {
-    union {
-        Inst* in_bounds_inst;
-        Inst* sparse_inst;
-        Inst* zero_inst{};
-    };
-    Inst* sign_inst{};
-    Inst* carry_inst{};
-    Inst* overflow_inst{};
-};
-
-} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp
index 89a17fb1b4..3fc06f8551 100644
--- a/src/shader_recompiler/frontend/ir/program.cpp
+++ b/src/shader_recompiler/frontend/ir/program.cpp
@@ -8,8 +8,8 @@
 #include <fmt/format.h>
 
 #include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
 
 namespace Shader::IR {
 
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index bf5f8c0c20..a8a919e0e6 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -2,7 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/opcodes.h"
 #include "shader_recompiler/frontend/ir/value.h"
 
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 3037455632..d90a68b377 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -4,19 +4,34 @@
 
 #pragma once
 
+#include <array>
+#include <cstring>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <boost/container/small_vector.hpp>
+#include <boost/intrusive/list.hpp>
+
+#include "common/bit_cast.h"
 #include "common/common_types.h"
 #include "shader_recompiler/exception.h"
 #include "shader_recompiler/frontend/ir/attribute.h"
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/patch.h"
 #include "shader_recompiler/frontend/ir/pred.h"
 #include "shader_recompiler/frontend/ir/reg.h"
-#include "shader_recompiler/frontend/ir/patch.h"
 #include "shader_recompiler/frontend/ir/type.h"
+#include "shader_recompiler/frontend/ir/value.h"
 
 namespace Shader::IR {
 
 class Block;
 class Inst;
 
+struct AssociatedInsts;
+
 class Value {
 public:
     Value() noexcept : type{IR::Type::Void}, inst{nullptr} {}
@@ -101,6 +116,140 @@ public:
     explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
 };
 
+class Inst : public boost::intrusive::list_base_hook<> {
+public:
+    explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
+    ~Inst();
+
+    Inst& operator=(const Inst&) = delete;
+    Inst(const Inst&) = delete;
+
+    Inst& operator=(Inst&&) = delete;
+    Inst(Inst&&) = delete;
+
+    /// Get the number of uses this instruction has.
+    [[nodiscard]] int UseCount() const noexcept {
+        return use_count;
+    }
+
+    /// Determines whether this instruction has uses or not.
+    [[nodiscard]] bool HasUses() const noexcept {
+        return use_count > 0;
+    }
+
+    /// Get the opcode this microinstruction represents.
+    [[nodiscard]] IR::Opcode GetOpcode() const noexcept {
+        return op;
+    }
+
+    /// Determines if there is a pseudo-operation associated with this instruction.
+    [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
+        return associated_insts != nullptr;
+    }
+
+    /// Determines whether or not this instruction may have side effects.
+    [[nodiscard]] bool MayHaveSideEffects() const noexcept;
+
+    /// Determines whether or not this instruction is a pseudo-instruction.
+    /// Pseudo-instructions depend on their parent instructions for their semantics.
+    [[nodiscard]] bool IsPseudoInstruction() const noexcept;
+
+    /// Determines if all arguments of this instruction are immediates.
+    [[nodiscard]] bool AreAllArgsImmediates() const;
+
+    /// Gets a pseudo-operation associated with this instruction
+    [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
+
+    /// Get the type this instruction returns.
+    [[nodiscard]] IR::Type Type() const;
+
+    /// Get the number of arguments this instruction has.
+    [[nodiscard]] size_t NumArgs() const {
+        return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op);
+    }
+
+    /// Get the value of a given argument index.
+    [[nodiscard]] Value Arg(size_t index) const noexcept {
+        if (op == IR::Opcode::Phi) {
+            return phi_args[index].second;
+        } else {
+            return args[index];
+        }
+    }
+
+    /// Set the value of a given argument index.
+    void SetArg(size_t index, Value value);
+
+    /// Get a pointer to the block of a phi argument.
+    [[nodiscard]] Block* PhiBlock(size_t index) const;
+    /// Add phi operand to a phi instruction.
+    void AddPhiOperand(Block* predecessor, const Value& value);
+
+    void Invalidate();
+    void ClearArgs();
+
+    void ReplaceUsesWith(Value replacement);
+
+    void ReplaceOpcode(IR::Opcode opcode);
+
+    template <typename FlagsType>
+    requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
+        [[nodiscard]] FlagsType Flags() const noexcept {
+        FlagsType ret;
+        std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
+        return ret;
+    }
+
+    template <typename FlagsType>
+    requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
+        [[nodiscard]] void SetFlags(FlagsType value) noexcept {
+        std::memcpy(&flags, &value, sizeof(value));
+    }
+
+    /// Intrusively store the host definition of this instruction.
+    template <typename DefinitionType>
+    void SetDefinition(DefinitionType def) {
+        definition = Common::BitCast<u32>(def);
+    }
+
+    /// Return the intrusively stored host definition of this instruction.
+    template <typename DefinitionType>
+    [[nodiscard]] DefinitionType Definition() const noexcept {
+        return Common::BitCast<DefinitionType>(definition);
+    }
+
+private:
+    struct NonTriviallyDummy {
+        NonTriviallyDummy() noexcept {}
+    };
+
+    void Use(const Value& value);
+    void UndoUse(const Value& value);
+
+    IR::Opcode op{};
+    int use_count{};
+    u32 flags{};
+    u32 definition{};
+    union {
+        NonTriviallyDummy dummy{};
+        boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
+        std::array<Value, 5> args;
+    };
+    std::unique_ptr<AssociatedInsts> associated_insts;
+};
+static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
+
+struct AssociatedInsts {
+    union {
+        Inst* in_bounds_inst;
+        Inst* sparse_inst;
+        Inst* zero_inst{};
+    };
+    Inst* sign_inst{};
+    Inst* carry_inst{};
+    Inst* overflow_inst{};
+};
+
 using U1 = TypedValue<Type::U1>;
 using U8 = TypedValue<Type::U8>;
 using U16 = TypedValue<Type::U16>;
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
index e4797291e2..a6be12ba2e 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -11,7 +11,7 @@
 
 #include "shader_recompiler/environment.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
 #include "shader_recompiler/object_pool.h"
 
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index cccf0909de..bb4aeb57c9 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -3,9 +3,9 @@
 // Refer to the license.txt file included.
 
 #include "shader_recompiler/environment.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/modifiers.h"
 #include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/ir_opt/passes.h"
 #include "shader_recompiler/shader_info.h"
 
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 2206f93c27..770d3de61b 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -11,7 +11,7 @@
 #include "common/bit_util.h"
 #include "shader_recompiler/exception.h"
 #include "shader_recompiler/frontend/ir/ir_emitter.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/ir_opt/passes.h"
 
 namespace Shader::Optimization {
diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
index 8ad59f42e0..f9c5334b5d 100644
--- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
+++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
@@ -5,7 +5,7 @@
 #include <ranges>
 
 #include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/ir_opt/passes.h"
 
 namespace Shader::Optimization {
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index f294d297f6..87eca2a0df 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -15,7 +15,7 @@
 #include "shader_recompiler/frontend/ir/basic_block.h"
 #include "shader_recompiler/frontend/ir/breadth_first_search.h"
 #include "shader_recompiler/frontend/ir/ir_emitter.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/ir_opt/passes.h"
 
 namespace Shader::Optimization {
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
index 38af72dfea..6afbe24f7f 100644
--- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -5,7 +5,7 @@
 #include <vector>
 
 #include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/ir_opt/passes.h"
 
 namespace Shader::Optimization {
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
index 62e73d52d9..773e1f9618 100644
--- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -5,7 +5,7 @@
 #include <algorithm>
 
 #include "shader_recompiler/frontend/ir/ir_emitter.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/ir_opt/passes.h"
 
 namespace Shader::Optimization {
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index bb1a90004f..fe86a164b7 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -23,10 +23,10 @@
 #include <boost/container/flat_set.hpp>
 
 #include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/opcodes.h"
 #include "shader_recompiler/frontend/ir/pred.h"
 #include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/ir_opt/passes.h"
 
 namespace Shader::Optimization {
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
index dbec96d84a..62bf5f8ff0 100644
--- a/src/shader_recompiler/ir_opt/verification_pass.cpp
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -6,7 +6,7 @@
 
 #include "shader_recompiler/exception.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/ir_opt/passes.h"
 
 namespace Shader::Optimization {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 8b6839966f..e12e4422f7 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -16,7 +16,7 @@
 #include "common/common_types.h"
 #include "common/thread_worker.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
+#include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
 #include "shader_recompiler/object_pool.h"
 #include "shader_recompiler/profile.h"
-- 
cgit v1.2.3-70-g09d2


From f66851e37682ce538172b0945908227ada8d21ac Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 21 Apr 2021 02:43:44 -0300
Subject: shader: Use memset to reset instruction arguments

---
 src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 +++-
 src/shader_recompiler/frontend/ir/value.h              | 7 ++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 701746a0cc..e3350931b8 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -279,8 +279,10 @@ void Inst::ClearArgs() {
             if (!value.IsImmediate()) {
                 UndoUse(value);
             }
-            value = {};
         }
+        // Reset arguments to null
+        // std::memset was measured to be faster on MSVC than std::ranges:fill
+        std::memset(&args, 0, sizeof(args));
     }
 }
 
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 5425e42a1e..7b20824edc 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -34,7 +34,7 @@ struct AssociatedInsts;
 
 class Value {
 public:
-    Value() noexcept : type{IR::Type::Void}, inst{nullptr} {}
+    Value() noexcept = default;
     explicit Value(IR::Inst* value) noexcept;
     explicit Value(IR::Block* value) noexcept;
     explicit Value(IR::Reg value) noexcept;
@@ -78,9 +78,9 @@ public:
 private:
     void ValidateAccess(IR::Type expected) const;
 
-    IR::Type type;
+    IR::Type type{};
     union {
-        IR::Inst* inst;
+        IR::Inst* inst{};
         IR::Block* label;
         IR::Reg reg;
         IR::Pred pred;
@@ -95,6 +95,7 @@ private:
         f64 imm_f64;
     };
 };
+static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero");
 static_assert(std::is_trivially_copyable_v<Value>);
 
 template <IR::Type type_>
-- 
cgit v1.2.3-70-g09d2


From c8f9772d6590a018665d47a165951864ff783017 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 21 Apr 2021 06:10:04 -0300
Subject: shader: Fix gcc warnings

---
 src/shader_recompiler/frontend/ir/microinstruction.cpp             | 2 +-
 src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index e3350931b8..b424d038e5 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -282,7 +282,7 @@ void Inst::ClearArgs() {
         }
         // Reset arguments to null
         // std::memset was measured to be faster on MSVC than std::ranges:fill
-        std::memset(&args, 0, sizeof(args));
+        std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args));
     }
 }
 
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index b85b613f38..cc5410c6df 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -267,7 +267,7 @@ bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) {
     return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt);
 }
 
-bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept {
+[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept {
     Node it{goto_stmt};
     do {
         if (it == label_stmt) {
-- 
cgit v1.2.3-70-g09d2


From aece958c2ba5d4fe37246a6a7502d182931a7483 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 22 Apr 2021 16:50:13 -0300
Subject: shader: Add missing UndoUse case for GetSparseFromOp

---
 src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index b424d038e5..5c1b02d537 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -364,6 +364,10 @@ void Inst::UndoUse(const Value& value) {
         AllocAssociatedInsts(assoc_inst);
         RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
         break;
+    case Opcode::GetSparseFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp);
+        break;
     case Opcode::GetInBoundsFromOp:
         AllocAssociatedInsts(assoc_inst);
         RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp);
-- 
cgit v1.2.3-70-g09d2


From b541f5e5e333a8ec8c3569e02d67e59ad14217c2 Mon Sep 17 00:00:00 2001
From: FernandoS27 <fsahmkow27@gmail.com>
Date: Mon, 19 Apr 2021 01:03:38 +0200
Subject: shader: Implement VertexA stage

---
 src/shader_recompiler/CMakeLists.txt               |  1 +
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  1 +
 .../backend/spirv/emit_spirv_control_flow.cpp      |  4 ++
 .../frontend/ir/microinstruction.cpp               |  1 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |  1 +
 src/shader_recompiler/frontend/maxwell/program.cpp | 28 ++++++++
 src/shader_recompiler/frontend/maxwell/program.h   |  2 +
 src/shader_recompiler/ir_opt/dual_vertex_pass.cpp  | 74 ++++++++++++++++++++++
 .../global_memory_to_storage_buffer_pass.cpp       | 26 ++++++++
 src/shader_recompiler/ir_opt/passes.h              |  7 ++
 src/shader_recompiler/ir_opt/texture_pass.cpp      | 21 ++++++
 .../renderer_vulkan/vk_pipeline_cache.cpp          | 17 ++++-
 12 files changed, 180 insertions(+), 3 deletions(-)
 create mode 100644 src/shader_recompiler/ir_opt/dual_vertex_pass.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 051e5d05ad..151733090c 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -162,6 +162,7 @@ add_library(shader_recompiler STATIC
     ir_opt/collect_shader_info_pass.cpp
     ir_opt/constant_propagation_pass.cpp
     ir_opt/dead_code_elimination_pass.cpp
+    ir_opt/dual_vertex_pass.cpp
     ir_opt/global_memory_to_storage_buffer_pass.cpp
     ir_opt/identity_removal_pass.cpp
     ir_opt/lower_fp16_to_fp32.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 4562db45b4..c352bbd84f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -25,6 +25,7 @@ void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id fal
 void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label);
 void EmitSelectionMerge(EmitContext& ctx, Id merge_label);
 void EmitReturn(EmitContext& ctx);
+void EmitJoin(EmitContext& ctx);
 void EmitUnreachable(EmitContext& ctx);
 void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label);
 void EmitBarrier(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
index 335603f881..d3a1db340c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -26,6 +26,10 @@ void EmitReturn(EmitContext& ctx) {
     ctx.OpReturn();
 }
 
+void EmitJoin(EmitContext&) {
+    throw NotImplementedException("Join shouldn't be emitted");
+}
+
 void EmitUnreachable(EmitContext& ctx) {
     ctx.OpUnreachable();
 }
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 5c1b02d537..dba9021869 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -61,6 +61,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::LoopMerge:
     case Opcode::SelectionMerge:
     case Opcode::Return:
+    case Opcode::Join:
     case Opcode::Unreachable:
     case Opcode::DemoteToHelperInvocation:
     case Opcode::Barrier:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 8f32c9e74b..b14719c515 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -13,6 +13,7 @@ OPCODE(BranchConditional,                                   Void,           U1,
 OPCODE(LoopMerge,                                           Void,           Label,          Label,                                                          )
 OPCODE(SelectionMerge,                                      Void,           Label,                                                                          )
 OPCODE(Return,                                              Void,                                                                                           )
+OPCODE(Join,                                                Void,                                                                                           )
 OPCODE(Unreachable,                                         Void,                                                                                           )
 OPCODE(DemoteToHelperInvocation,                            Void,           Label,                                                                          )
 
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index aee96eae38..59897cb3e1 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -150,4 +150,32 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
     return program;
 }
 
+IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+                                    Environment& env2) {
+    IR::Program program{};
+    Optimization::VertexATransformPass(vertex_a);
+    Optimization::VertexBTransformPass(vertex_b);
+    program.blocks.swap(vertex_a.blocks);
+    for (IR::Block* block : vertex_b.blocks) {
+        program.blocks.push_back(block);
+    }
+    program.stage = Stage::VertexB;
+    program.info = vertex_a.info;
+    program.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
+
+    for (size_t index = 0; index < 32; index++) {
+        program.info.input_generics[index].used |= vertex_b.info.input_generics[index].used;
+        program.info.stores_generics[index] |= vertex_b.info.stores_generics[index];
+    }
+    Optimization::JoinTextureInfo(program.info, vertex_b.info);
+    Optimization::JoinStorageInfo(program.info, vertex_b.info);
+    Optimization::DualVertexJoinPass(program);
+    program.post_order_blocks = PostOrder(program.blocks);
+    Optimization::DeadCodeEliminationPass(program);
+    Optimization::IdentityRemovalPass(program);
+    Optimization::VerificationPass(program);
+    Optimization::CollectShaderInfoPass(env2, program);
+    return program;
+}
+
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h
index 542621a1de..6e5d5ddd0a 100644
--- a/src/shader_recompiler/frontend/maxwell/program.h
+++ b/src/shader_recompiler/frontend/maxwell/program.h
@@ -21,4 +21,6 @@ namespace Shader::Maxwell {
                                            ObjectPool<IR::Block>& block_pool, Environment& env,
                                            Flow::CFG& cfg);
 
+[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+                                                  Environment& env_vertex_b);
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
new file mode 100644
index 0000000000..f35c6478ac
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
@@ -0,0 +1,74 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <ranges>
+#include <tuple>
+#include <type_traits>
+
+#include "common/bit_cast.h"
+#include "common/bit_util.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void VertexATransformPass(IR::Program& program) {
+    bool replaced_join{};
+    bool eliminated_epilogue{};
+    for (IR::Block* const block : program.post_order_blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            switch (inst.GetOpcode()) {
+            case IR::Opcode::Return:
+                inst.ReplaceOpcode(IR::Opcode::Join);
+                replaced_join = true;
+                break;
+            case IR::Opcode::Epilogue:
+                inst.Invalidate();
+                eliminated_epilogue = true;
+                break;
+            default:
+                break;
+            }
+            if (replaced_join && eliminated_epilogue) {
+                return;
+            }
+        }
+    }
+}
+
+void VertexBTransformPass(IR::Program& program) {
+    for (IR::Block* const block : program.post_order_blocks | std::views::reverse) {
+        for (IR::Inst& inst : block->Instructions()) {
+            if (inst.GetOpcode() == IR::Opcode::Prologue) {
+                return inst.Invalidate();
+            }
+        }
+    }
+}
+
+void DualVertexJoinPass(IR::Program& program) {
+    const auto& blocks = program.blocks;
+    s64 s = static_cast<s64>(blocks.size()) - 1;
+    if (s < 1) {
+        throw NotImplementedException("Dual Vertex Join pass failed, expected atleast 2 blocks!");
+    }
+    for (s64 index = 0; index < s; index++) {
+        IR::Block* const current_block = blocks[index];
+        IR::Block* const next_block = blocks[index + 1];
+        for (IR::Inst& inst : current_block->Instructions()) {
+            if (inst.GetOpcode() == IR::Opcode::Join) {
+                IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)};
+                ir.Branch(next_block);
+                inst.Invalidate();
+                // only 1 join should exist
+                return;
+            }
+        }
+    }
+    throw NotImplementedException("Dual Vertex Join pass failed, no join present!");
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 87eca2a0df..1d11a00d8a 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -499,4 +499,30 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
     }
 }
 
+template <typename Descriptors, typename Descriptor, typename Func>
+static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
+    // TODO: Handle arrays
+    const auto it{std::ranges::find_if(descriptors, pred)};
+    if (it != descriptors.end()) {
+        return static_cast<u32>(std::distance(descriptors.begin(), it));
+    }
+    descriptors.push_back(desc);
+    return static_cast<u32>(descriptors.size()) - 1;
+}
+
+void JoinStorageInfo(Info& base, Info& source) {
+    auto& descriptors = base.storage_buffers_descriptors;
+    for (auto& desc : source.storage_buffers_descriptors) {
+        auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) {
+            return desc.cbuf_index == existing.cbuf_index &&
+                   desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count;
+        })};
+        if (it != descriptors.end()) {
+            it->is_written |= desc.is_written;
+            continue;
+        }
+        descriptors.push_back(desc);
+    }
+}
+
 } // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 186104713d..e9cb8546a9 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -22,4 +22,11 @@ void SsaRewritePass(IR::Program& program);
 void TexturePass(Environment& env, IR::Program& program);
 void VerificationPass(const IR::Program& program);
 
+// Dual Vertex
+void VertexATransformPass(IR::Program& program);
+void VertexBTransformPass(IR::Program& program);
+void DualVertexJoinPass(IR::Program& program);
+void JoinTextureInfo(Info& base, Info& source);
+void JoinStorageInfo(Info& base, Info& source);
+
 } // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index cfa6b34b9a..2b38bcf423 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -426,4 +426,25 @@ void TexturePass(Environment& env, IR::Program& program) {
     }
 }
 
+void JoinTextureInfo(Info& base, Info& source) {
+    Descriptors descriptors{
+        base.texture_buffer_descriptors,
+        base.image_buffer_descriptors,
+        base.texture_descriptors,
+        base.image_descriptors,
+    };
+    for (auto& desc : source.texture_buffer_descriptors) {
+        descriptors.Add(desc);
+    }
+    for (auto& desc : source.image_buffer_descriptors) {
+        descriptors.Add(desc);
+    }
+    for (auto& desc : source.texture_descriptors) {
+        descriptors.Add(desc);
+    }
+    for (auto& desc : source.image_descriptors) {
+        descriptors.Add(desc);
+    }
+}
+
 } // namespace Shader::Optimization
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 0822862fe8..638475251e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -47,6 +47,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache);
 
 namespace {
 using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::MergeDualVertexPrograms;
 using Shader::Maxwell::TranslateProgram;
 using VideoCommon::ComputeEnvironment;
 using VideoCommon::FileEnvironment;
@@ -287,22 +288,32 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
     LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
     size_t env_index{0};
     std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+    bool uses_vertex_a{};
+    std::size_t start_value_processing{};
     for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         if (key.unique_hashes[index] == 0) {
             continue;
         }
+        uses_vertex_a |= index == 0;
         Shader::Environment& env{*envs[env_index]};
         ++env_index;
 
         const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
-        Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
-        programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
+        Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+        if (!uses_vertex_a || index != 1) {
+            programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
+            continue;
+        }
+        Shader::IR::Program& program_va{programs[0]};
+        Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)};
+        programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
+        start_value_processing = 1;
     }
     std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
     std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
 
     u32 binding{0};
-    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+    for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) {
         if (key.unique_hashes[index] == 0) {
             continue;
         }
-- 
cgit v1.2.3-70-g09d2


From 7ecc6de56ae01602b25408db8b6658d7a41a419a Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Fri, 23 Apr 2021 17:47:54 -0400
Subject: shader: Implement Int32 SUATOM/SURED

---
 src/shader_recompiler/CMakeLists.txt               |   2 +
 .../backend/spirv/emit_context.cpp                 |   3 +
 src/shader_recompiler/backend/spirv/emit_context.h |   1 +
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |   3 +
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  44 +++++
 .../backend/spirv/emit_spirv_image_atomic.cpp      | 182 ++++++++++++++++++
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  89 +++++++++
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  26 +++
 .../frontend/ir/microinstruction.cpp               |  33 ++++
 src/shader_recompiler/frontend/ir/opcodes.inc      |  38 ++++
 src/shader_recompiler/frontend/maxwell/maxwell.inc |   3 +-
 .../frontend/maxwell/translate/impl/impl.h         |   1 +
 .../maxwell/translate/impl/not_implemented.cpp     |   4 -
 .../translate/impl/surface_atomic_operations.cpp   | 204 +++++++++++++++++++++
 .../ir_opt/collect_shader_info_pass.cpp            |  36 ++++
 src/shader_recompiler/ir_opt/texture_pass.cpp      |  68 ++++++-
 src/shader_recompiler/shader_info.h                |   2 +
 17 files changed, 733 insertions(+), 6 deletions(-)
 create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index f20031d98d..0bcd714d6e 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -12,6 +12,7 @@ add_library(shader_recompiler STATIC
     backend/spirv/emit_spirv_convert.cpp
     backend/spirv/emit_spirv_floating_point.cpp
     backend/spirv/emit_spirv_image.cpp
+    backend/spirv/emit_spirv_image_atomic.cpp
     backend/spirv/emit_spirv_integer.cpp
     backend/spirv/emit_spirv_logical.cpp
     backend/spirv/emit_spirv_memory.cpp
@@ -138,6 +139,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/predicate_set_predicate.cpp
     frontend/maxwell/translate/impl/predicate_set_register.cpp
     frontend/maxwell/translate/impl/select_source_with_predicate.cpp
+    frontend/maxwell/translate/impl/surface_atomic_operations.cpp
     frontend/maxwell/translate/impl/surface_load_store.cpp
     frontend/maxwell/translate/impl/texture_fetch.cpp
     frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index e9ffe4955c..549df0d4bd 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -1107,6 +1107,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) {
         }
         ++binding;
     }
+    if (info.uses_atomic_image_u32) {
+        image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
+    }
 }
 
 void EmitContext::DefineImages(const Info& info, u32& binding) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 823ed8525b..30b08104dd 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -198,6 +198,7 @@ public:
 
     Id image_buffer_type{};
     Id sampled_texture_buffer_type{};
+    Id image_u32{};
 
     std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
     std::array<StorageDefinitions, Info::MAX_SSBOS> ssbos{};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 745a834e3e..3f9adc902d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -335,6 +335,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
     if (info.uses_typeless_image_writes) {
         ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
     }
+    if (info.uses_image_buffers) {
+        ctx.AddCapability(spv::Capability::ImageBuffer);
+    }
     if (info.uses_sample_id) {
         ctx.AddCapability(spv::Capability::SampleRateShading);
     }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 8f6482b7b9..47d62b190a 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -509,6 +509,50 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
                      Id derivates, Id offset, Id lod_clamp);
 Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
 void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
+Id EmitBindlessImageAtomicIAdd32(EmitContext&);
+Id EmitBindlessImageAtomicSMin32(EmitContext&);
+Id EmitBindlessImageAtomicUMin32(EmitContext&);
+Id EmitBindlessImageAtomicSMax32(EmitContext&);
+Id EmitBindlessImageAtomicUMax32(EmitContext&);
+Id EmitBindlessImageAtomicInc32(EmitContext&);
+Id EmitBindlessImageAtomicDec32(EmitContext&);
+Id EmitBindlessImageAtomicAnd32(EmitContext&);
+Id EmitBindlessImageAtomicOr32(EmitContext&);
+Id EmitBindlessImageAtomicXor32(EmitContext&);
+Id EmitBindlessImageAtomicExchange32(EmitContext&);
+Id EmitBoundImageAtomicIAdd32(EmitContext&);
+Id EmitBoundImageAtomicSMin32(EmitContext&);
+Id EmitBoundImageAtomicUMin32(EmitContext&);
+Id EmitBoundImageAtomicSMax32(EmitContext&);
+Id EmitBoundImageAtomicUMax32(EmitContext&);
+Id EmitBoundImageAtomicInc32(EmitContext&);
+Id EmitBoundImageAtomicDec32(EmitContext&);
+Id EmitBoundImageAtomicAnd32(EmitContext&);
+Id EmitBoundImageAtomicOr32(EmitContext&);
+Id EmitBoundImageAtomicXor32(EmitContext&);
+Id EmitBoundImageAtomicExchange32(EmitContext&);
+Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value);
+Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value);
+Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value);
+Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value);
+Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value);
+Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value);
+Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value);
+Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value);
+Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                       Id value);
+Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value);
+Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                             Id value);
 Id EmitLaneId(EmitContext& ctx);
 Id EmitVoteAll(EmitContext& ctx, Id pred);
 Id EmitVoteAny(EmitContext& ctx, Id pred);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
new file mode 100644
index 0000000000..05bed22b90
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
@@ -0,0 +1,182 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
+    if (!index.IsImmediate()) {
+        throw NotImplementedException("Indirect image indexing");
+    }
+    if (info.type == TextureType::Buffer) {
+        const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())};
+        return def.id;
+    } else {
+        const ImageDefinition def{ctx.images.at(index.U32())};
+        return def.id;
+    }
+}
+
+std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
+    const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
+    const Id semantics{ctx.u32_zero_value};
+    return {scope, semantics};
+}
+
+Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value,
+                  Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    const Id image{Image(ctx, index, info)};
+    const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))};
+    const auto [scope, semantics]{AtomicArgs(ctx)};
+    return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+}
+} // Anonymous namespace
+
+Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicIAdd);
+}
+
+Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMin);
+}
+
+Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMin);
+}
+
+Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMax);
+}
+
+Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                         Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMax);
+}
+
+Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
+    // TODO: This is not yet implemented
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
+    // TODO: This is not yet implemented
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicAnd);
+}
+
+Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                       Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicOr);
+}
+
+Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                        Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicXor);
+}
+
+Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+                             Id value) {
+    return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicExchange);
+}
+
+Id EmitBindlessImageAtomicIAdd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicSMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicUMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicSMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicUMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicInc32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicDec32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicAnd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicOr32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicXor32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicExchange32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicIAdd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicSMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicUMin32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicSMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicUMax32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicInc32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicDec32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicAnd32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicOr32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicXor32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicExchange32(EmitContext&) {
+    throw NotImplementedException("SPIR-V Instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 5913fdeffa..354d72c9b2 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -1869,6 +1869,95 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value
     Inst(op, Flags{info}, handle, coords, color);
 }
 
+Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
+                                 TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32
+                                         : Opcode::BindlessImageAtomicIAdd32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
+                                 TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32
+                                         : Opcode::BindlessImageAtomicSMin32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
+                                 TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32
+                                         : Opcode::BindlessImageAtomicUMin32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
+                                 bool is_signed, TextureInstInfo info) {
+    return is_signed ? ImageAtomicSMin(handle, coords, value, info)
+                     : ImageAtomicUMin(handle, coords, value, info);
+}
+
+Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
+                                 TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32
+                                         : Opcode::BindlessImageAtomicSMax32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
+                                 TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32
+                                         : Opcode::BindlessImageAtomicUMax32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
+                                 bool is_signed, TextureInstInfo info) {
+    return is_signed ? ImageAtomicSMax(handle, coords, value, info)
+                     : ImageAtomicUMax(handle, coords, value, info);
+}
+
+Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
+                                TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32
+                                         : Opcode::BindlessImageAtomicInc32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
+                                TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32
+                                         : Opcode::BindlessImageAtomicDec32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
+                                TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32
+                                         : Opcode::BindlessImageAtomicAnd32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
+                               TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32
+                                         : Opcode::BindlessImageAtomicOr32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
+                                TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32
+                                         : Opcode::BindlessImageAtomicXor32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
+                                     TextureInstInfo info) {
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32
+                                         : Opcode::BindlessImageAtomicExchange32};
+    return Inst(op, Flags{info}, handle, coords, value);
+}
+
 U1 IREmitter::VoteAll(const U1& value) {
     return Inst<U1>(Opcode::VoteAll, value);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index a12919283f..4e614d424f 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -334,6 +334,32 @@ public:
     [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color,
                                   TextureInstInfo info);
 
+    [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
+                                        const Value& value, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
+                                        const Value& value, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
+                                        const Value& value, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
+                                        const Value& value, bool is_signed, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
+                                        const Value& value, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
+                                        const Value& value, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
+                                        const Value& value, bool is_signed, TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
+                                       TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
+                                       TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
+                                       TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
+                                      TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
+                                       TextureInstInfo info);
+    [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
+                                            const Value& value, TextureInstInfo info);
     [[nodiscard]] U1 VoteAll(const U1& value);
     [[nodiscard]] U1 VoteAny(const U1& value);
     [[nodiscard]] U1 VoteEqual(const U1& value);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index dba9021869..616ef17d4e 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -166,6 +166,39 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::BindlessImageWrite:
     case Opcode::BoundImageWrite:
     case Opcode::ImageWrite:
+    case IR::Opcode::BindlessImageAtomicIAdd32:
+    case IR::Opcode::BindlessImageAtomicSMin32:
+    case IR::Opcode::BindlessImageAtomicUMin32:
+    case IR::Opcode::BindlessImageAtomicSMax32:
+    case IR::Opcode::BindlessImageAtomicUMax32:
+    case IR::Opcode::BindlessImageAtomicInc32:
+    case IR::Opcode::BindlessImageAtomicDec32:
+    case IR::Opcode::BindlessImageAtomicAnd32:
+    case IR::Opcode::BindlessImageAtomicOr32:
+    case IR::Opcode::BindlessImageAtomicXor32:
+    case IR::Opcode::BindlessImageAtomicExchange32:
+    case IR::Opcode::BoundImageAtomicIAdd32:
+    case IR::Opcode::BoundImageAtomicSMin32:
+    case IR::Opcode::BoundImageAtomicUMin32:
+    case IR::Opcode::BoundImageAtomicSMax32:
+    case IR::Opcode::BoundImageAtomicUMax32:
+    case IR::Opcode::BoundImageAtomicInc32:
+    case IR::Opcode::BoundImageAtomicDec32:
+    case IR::Opcode::BoundImageAtomicAnd32:
+    case IR::Opcode::BoundImageAtomicOr32:
+    case IR::Opcode::BoundImageAtomicXor32:
+    case IR::Opcode::BoundImageAtomicExchange32:
+    case IR::Opcode::ImageAtomicIAdd32:
+    case IR::Opcode::ImageAtomicSMin32:
+    case IR::Opcode::ImageAtomicUMin32:
+    case IR::Opcode::ImageAtomicSMax32:
+    case IR::Opcode::ImageAtomicUMax32:
+    case IR::Opcode::ImageAtomicInc32:
+    case IR::Opcode::ImageAtomicDec32:
+    case IR::Opcode::ImageAtomicAnd32:
+    case IR::Opcode::ImageAtomicOr32:
+    case IR::Opcode::ImageAtomicXor32:
+    case IR::Opcode::ImageAtomicExchange32:
         return true;
     default:
         return false;
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index b14719c515..9165421f89 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -496,6 +496,44 @@ OPCODE(ImageGradient,                                       F32x4,          Opaq
 OPCODE(ImageRead,                                           U32x4,          Opaque,         Opaque,                                                         )
 OPCODE(ImageWrite,                                          Void,           Opaque,         Opaque,         U32x4,                                          )
 
+// Atomic Image operations
+
+OPCODE(BindlessImageAtomicIAdd32,                           U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicSMin32,                           U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicUMin32,                           U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicSMax32,                           U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicUMax32,                           U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicInc32,                            U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicDec32,                            U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicAnd32,                            U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicOr32,                             U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicXor32,                            U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BindlessImageAtomicExchange32,                       U32,            U32,            Opaque,            U32,                                         )
+
+OPCODE(BoundImageAtomicIAdd32,                              U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicSMin32,                              U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicUMin32,                              U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicSMax32,                              U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicUMax32,                              U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicInc32,                               U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicDec32,                               U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicAnd32,                               U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicOr32,                                U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicXor32,                               U32,            U32,            Opaque,            U32,                                         )
+OPCODE(BoundImageAtomicExchange32,                          U32,            U32,            Opaque,            U32,                                         )
+
+OPCODE(ImageAtomicIAdd32,                                   U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicSMin32,                                   U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicUMin32,                                   U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicSMax32,                                   U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicUMax32,                                   U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicInc32,                                    U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicDec32,                                    U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicAnd32,                                    U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicOr32,                                     U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicXor32,                                    U32,            Opaque,            Opaque,            U32,                                      )
+OPCODE(ImageAtomicExchange32,                               U32,            Opaque,            Opaque,            U32,                                      )
+
 // Warp operations
 OPCODE(LaneId,                                              U32,                                                                                            )
 OPCODE(VoteAll,                                             U1,             U1,                                                                             )
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
index c759bd4d4c..2fee591bbb 100644
--- a/src/shader_recompiler/frontend/maxwell/maxwell.inc
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -244,7 +244,8 @@ INST(STG,          "STG",            "1110 1110 1101 1---")
 INST(STL,          "STL",            "1110 1111 0101 0---")
 INST(STP,          "STP",            "1110 1110 1010 0---")
 INST(STS,          "STS",            "1110 1111 0101 1---")
-INST(SUATOM_cas,   "SUATOM",         "1110 1010 ---- ----")
+INST(SUATOM,       "SUATOM",         "1110 1010 0--- ----")
+INST(SUATOM_cas,   "SUATOM_cas",     "1110 1010 1--- ----")
 INST(SULD,         "SULD",           "1110 1011 000- ----")
 INST(SURED,        "SURED",          "1110 1011 010- ----")
 INST(SUST,         "SUST",           "1110 1011 001- ----")
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index bf7d1bae87..335e4f24fc 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -303,6 +303,7 @@ public:
     void STL(u64 insn);
     void STP(u64 insn);
     void STS(u64 insn);
+    void SUATOM(u64 insn);
     void SUATOM_cas(u64 insn);
     void SULD(u64 insn);
     void SURED(u64 insn);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index a4f99bbbe9..7e26ab3594 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -249,10 +249,6 @@ void TranslatorVisitor::SUATOM_cas(u64) {
     ThrowNotImplemented(Opcode::SUATOM_cas);
 }
 
-void TranslatorVisitor::SURED(u64) {
-    ThrowNotImplemented(Opcode::SURED);
-}
-
 void TranslatorVisitor::SYNC(u64) {
     ThrowNotImplemented(Opcode::SYNC);
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
new file mode 100644
index 0000000000..994bdc3ebf
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
@@ -0,0 +1,204 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Type : u64 {
+    _1D,
+    BUFFER_1D,
+    ARRAY_1D,
+    _2D,
+    ARRAY_2D,
+    _3D,
+};
+
+enum class Size : u64 {
+    U32,
+    S32,
+    U64,
+    S64,
+    F32FTZRN,
+    F16x2FTZRN,
+    SD32,
+    SD64,
+};
+
+enum class AtomicOp : u64 {
+    ADD,
+    MIN,
+    MAX,
+    INC,
+    DEC,
+    AND,
+    OR,
+    XOR,
+    EXCH,
+};
+
+enum class Clamp : u64 {
+    IGN,
+    Default,
+    TRAP,
+};
+
+TextureType GetType(Type type) {
+    switch (type) {
+    case Type::_1D:
+        return TextureType::Color1D;
+    case Type::BUFFER_1D:
+        return TextureType::Buffer;
+    case Type::ARRAY_1D:
+        return TextureType::ColorArray1D;
+    case Type::_2D:
+        return TextureType::Color2D;
+    case Type::ARRAY_2D:
+        return TextureType::ColorArray2D;
+    case Type::_3D:
+        return TextureType::Color3D;
+    }
+    throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
+    const auto array{[&](int index) {
+        return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
+    }};
+    switch (type) {
+    case Type::_1D:
+    case Type::BUFFER_1D:
+        return v.X(reg);
+    default:
+        break;
+    }
+    throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
+                        const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
+                        bool is_signed) {
+    switch (op) {
+    case AtomicOp::ADD:
+        return ir.ImageAtomicIAdd(handle, coords, op_b, info);
+    case AtomicOp::MIN:
+        return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
+    case AtomicOp::MAX:
+        return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
+    case AtomicOp::INC:
+        return ir.ImageAtomicInc(handle, coords, op_b, info);
+    case AtomicOp::DEC:
+        return ir.ImageAtomicDec(handle, coords, op_b, info);
+    case AtomicOp::AND:
+        return ir.ImageAtomicAnd(handle, coords, op_b, info);
+    case AtomicOp::OR:
+        return ir.ImageAtomicOr(handle, coords, op_b, info);
+    case AtomicOp::XOR:
+        return ir.ImageAtomicXor(handle, coords, op_b, info);
+    case AtomicOp::EXCH:
+        return ir.ImageAtomicExchange(handle, coords, op_b, info);
+    default:
+        throw NotImplementedException("Atomic Operation {}", op);
+    }
+}
+
+ImageFormat Format(Size size) {
+    switch (size) {
+    case Size::U32:
+    case Size::S32:
+    case Size::SD32:
+        return ImageFormat::R32_UINT;
+    default:
+        break;
+    }
+    throw NotImplementedException("Invalid size {}", size);
+}
+
+bool IsSizeInt32(Size size) {
+    switch (size) {
+    case Size::U32:
+    case Size::S32:
+    case Size::SD32:
+        return true;
+    default:
+        return false;
+    }
+}
+
+void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
+                 IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
+                 u64 bound_offset, bool is_bindless, bool write_result) {
+    if (clamp != Clamp::IGN) {
+        throw NotImplementedException("Clamp {}", clamp);
+    }
+    if (!IsSizeInt32(size)) {
+        throw NotImplementedException("Size {}", size);
+    }
+    const bool is_signed{size == Size::S32};
+    const ImageFormat format{Format(size)};
+    const TextureType tex_type{GetType(type)};
+    const IR::Value coords{MakeCoords(v, coord_reg, type)};
+
+    const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
+                                          : v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
+    IR::TextureInstInfo info{};
+    info.type.Assign(tex_type);
+    info.image_format.Assign(format);
+
+    // TODO: float/64-bit operand
+    const IR::Value op_b{v.X(operand_reg)};
+    const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
+
+    if (write_result) {
+        v.X(dest_reg, IR::U32{color});
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SUATOM(u64 insn) {
+    union {
+        u64 raw;
+        BitField<54, 1, u64> is_bindless;
+        BitField<29, 4, AtomicOp> op;
+        BitField<33, 3, Type> type;
+        BitField<51, 3, Size> size;
+        BitField<49, 2, Clamp> clamp;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<20, 8, IR::Reg> operand_reg;
+        BitField<36, 13, u64> bound_offset;    // !is_bindless
+        BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
+    } const suatom{insn};
+
+    ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
+                suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
+                suatom.is_bindless != 0, true);
+}
+
+void TranslatorVisitor::SURED(u64 insn) {
+    // TODO: confirm offsets
+    union {
+        u64 raw;
+        BitField<51, 1, u64> is_bound;
+        BitField<21, 3, AtomicOp> op;
+        BitField<33, 3, Type> type;
+        BitField<20, 3, Size> size;
+        BitField<49, 2, Clamp> clamp;
+        BitField<0, 8, IR::Reg> operand_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<36, 13, u64> bound_offset;    // is_bound
+        BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+    } const sured{insn};
+    ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
+                sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
+                sured.is_bound == 0, false);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index bb4aeb57c9..7d8794a7e7 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -565,6 +565,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::ImageWrite: {
         const auto flags{inst.Flags<IR::TextureInstInfo>()};
         info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless;
+        info.uses_image_buffers |= flags.type == TextureType::Buffer;
         break;
     }
     case IR::Opcode::SubgroupEqMask:
@@ -696,6 +697,41 @@ void VisitUsages(Info& info, IR::Inst& inst) {
         info.used_storage_buffer_types |= IR::Type::U64;
         info.uses_int64_bit_atomics = true;
         break;
+    case IR::Opcode::BindlessImageAtomicIAdd32:
+    case IR::Opcode::BindlessImageAtomicSMin32:
+    case IR::Opcode::BindlessImageAtomicUMin32:
+    case IR::Opcode::BindlessImageAtomicSMax32:
+    case IR::Opcode::BindlessImageAtomicUMax32:
+    case IR::Opcode::BindlessImageAtomicInc32:
+    case IR::Opcode::BindlessImageAtomicDec32:
+    case IR::Opcode::BindlessImageAtomicAnd32:
+    case IR::Opcode::BindlessImageAtomicOr32:
+    case IR::Opcode::BindlessImageAtomicXor32:
+    case IR::Opcode::BindlessImageAtomicExchange32:
+    case IR::Opcode::BoundImageAtomicIAdd32:
+    case IR::Opcode::BoundImageAtomicSMin32:
+    case IR::Opcode::BoundImageAtomicUMin32:
+    case IR::Opcode::BoundImageAtomicSMax32:
+    case IR::Opcode::BoundImageAtomicUMax32:
+    case IR::Opcode::BoundImageAtomicInc32:
+    case IR::Opcode::BoundImageAtomicDec32:
+    case IR::Opcode::BoundImageAtomicAnd32:
+    case IR::Opcode::BoundImageAtomicOr32:
+    case IR::Opcode::BoundImageAtomicXor32:
+    case IR::Opcode::BoundImageAtomicExchange32:
+    case IR::Opcode::ImageAtomicIAdd32:
+    case IR::Opcode::ImageAtomicSMin32:
+    case IR::Opcode::ImageAtomicUMin32:
+    case IR::Opcode::ImageAtomicSMax32:
+    case IR::Opcode::ImageAtomicUMax32:
+    case IR::Opcode::ImageAtomicInc32:
+    case IR::Opcode::ImageAtomicDec32:
+    case IR::Opcode::ImageAtomicAnd32:
+    case IR::Opcode::ImageAtomicOr32:
+    case IR::Opcode::ImageAtomicXor32:
+    case IR::Opcode::ImageAtomicExchange32:
+        info.uses_atomic_image_u32 = true;
+        break;
     default:
         break;
     }
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index 2b38bcf423..9e0a2fb09f 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -76,6 +76,39 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) {
     case IR::Opcode::BoundImageWrite:
     case IR::Opcode::BindlessImageWrite:
         return IR::Opcode::ImageWrite;
+    case IR::Opcode::BoundImageAtomicIAdd32:
+    case IR::Opcode::BindlessImageAtomicIAdd32:
+        return IR::Opcode::ImageAtomicIAdd32;
+    case IR::Opcode::BoundImageAtomicSMin32:
+    case IR::Opcode::BindlessImageAtomicSMin32:
+        return IR::Opcode::ImageAtomicSMin32;
+    case IR::Opcode::BoundImageAtomicUMin32:
+    case IR::Opcode::BindlessImageAtomicUMin32:
+        return IR::Opcode::ImageAtomicUMin32;
+    case IR::Opcode::BoundImageAtomicSMax32:
+    case IR::Opcode::BindlessImageAtomicSMax32:
+        return IR::Opcode::ImageAtomicSMax32;
+    case IR::Opcode::BoundImageAtomicUMax32:
+    case IR::Opcode::BindlessImageAtomicUMax32:
+        return IR::Opcode::ImageAtomicUMax32;
+    case IR::Opcode::BoundImageAtomicInc32:
+    case IR::Opcode::BindlessImageAtomicInc32:
+        return IR::Opcode::ImageAtomicInc32;
+    case IR::Opcode::BoundImageAtomicDec32:
+    case IR::Opcode::BindlessImageAtomicDec32:
+        return IR::Opcode::ImageAtomicDec32;
+    case IR::Opcode::BoundImageAtomicAnd32:
+    case IR::Opcode::BindlessImageAtomicAnd32:
+        return IR::Opcode::ImageAtomicAnd32;
+    case IR::Opcode::BoundImageAtomicOr32:
+    case IR::Opcode::BindlessImageAtomicOr32:
+        return IR::Opcode::ImageAtomicOr32;
+    case IR::Opcode::BoundImageAtomicXor32:
+    case IR::Opcode::BindlessImageAtomicXor32:
+        return IR::Opcode::ImageAtomicXor32;
+    case IR::Opcode::BoundImageAtomicExchange32:
+    case IR::Opcode::BindlessImageAtomicExchange32:
+        return IR::Opcode::ImageAtomicExchange32;
     default:
         return IR::Opcode::Void;
     }
@@ -95,6 +128,17 @@ bool IsBindless(const IR::Inst& inst) {
     case IR::Opcode::BindlessImageGradient:
     case IR::Opcode::BindlessImageRead:
     case IR::Opcode::BindlessImageWrite:
+    case IR::Opcode::BindlessImageAtomicIAdd32:
+    case IR::Opcode::BindlessImageAtomicSMin32:
+    case IR::Opcode::BindlessImageAtomicUMin32:
+    case IR::Opcode::BindlessImageAtomicSMax32:
+    case IR::Opcode::BindlessImageAtomicUMax32:
+    case IR::Opcode::BindlessImageAtomicInc32:
+    case IR::Opcode::BindlessImageAtomicDec32:
+    case IR::Opcode::BindlessImageAtomicAnd32:
+    case IR::Opcode::BindlessImageAtomicOr32:
+    case IR::Opcode::BindlessImageAtomicXor32:
+    case IR::Opcode::BindlessImageAtomicExchange32:
         return true;
     case IR::Opcode::BoundImageSampleImplicitLod:
     case IR::Opcode::BoundImageSampleExplicitLod:
@@ -108,6 +152,17 @@ bool IsBindless(const IR::Inst& inst) {
     case IR::Opcode::BoundImageGradient:
     case IR::Opcode::BoundImageRead:
     case IR::Opcode::BoundImageWrite:
+    case IR::Opcode::BoundImageAtomicIAdd32:
+    case IR::Opcode::BoundImageAtomicSMin32:
+    case IR::Opcode::BoundImageAtomicUMin32:
+    case IR::Opcode::BoundImageAtomicSMax32:
+    case IR::Opcode::BoundImageAtomicUMax32:
+    case IR::Opcode::BoundImageAtomicInc32:
+    case IR::Opcode::BoundImageAtomicDec32:
+    case IR::Opcode::BoundImageAtomicAnd32:
+    case IR::Opcode::BoundImageAtomicOr32:
+    case IR::Opcode::BoundImageAtomicXor32:
+    case IR::Opcode::BoundImageAtomicExchange32:
         return false;
     default:
         throw InvalidArgument("Invalid opcode {}", inst.GetOpcode());
@@ -359,11 +414,22 @@ void TexturePass(Environment& env, IR::Program& program) {
         u32 index;
         switch (inst->GetOpcode()) {
         case IR::Opcode::ImageRead:
+        case IR::Opcode::ImageAtomicIAdd32:
+        case IR::Opcode::ImageAtomicSMin32:
+        case IR::Opcode::ImageAtomicUMin32:
+        case IR::Opcode::ImageAtomicSMax32:
+        case IR::Opcode::ImageAtomicUMax32:
+        case IR::Opcode::ImageAtomicInc32:
+        case IR::Opcode::ImageAtomicDec32:
+        case IR::Opcode::ImageAtomicAnd32:
+        case IR::Opcode::ImageAtomicOr32:
+        case IR::Opcode::ImageAtomicXor32:
+        case IR::Opcode::ImageAtomicExchange32:
         case IR::Opcode::ImageWrite: {
             if (cbuf.has_secondary) {
                 throw NotImplementedException("Unexpected separate sampler");
             }
-            const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite};
+            const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
             if (flags.type == TextureType::Buffer) {
                 index = descriptors.Add(ImageBufferDescriptor{
                     .format = flags.image_format,
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index cb1969b3a1..2f6adf714e 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -160,6 +160,7 @@ struct Info {
     bool uses_derivatives{};
     bool uses_typeless_image_reads{};
     bool uses_typeless_image_writes{};
+    bool uses_image_buffers{};
     bool uses_shared_increment{};
     bool uses_shared_decrement{};
     bool uses_global_increment{};
@@ -173,6 +174,7 @@ struct Info {
     bool uses_atomic_f32x2_max{};
     bool uses_int64_bit_atomics{};
     bool uses_global_memory{};
+    bool uses_atomic_image_u32{};
 
     IR::Type used_constant_buffer_types{};
     IR::Type used_storage_buffer_types{};
-- 
cgit v1.2.3-70-g09d2


From d54d7de40e7295827b0e4e4026441b53d3fc9569 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 14 May 2021 00:40:54 -0300
Subject: glasm: Rework control flow introducing a syntax list

This commit regresses VertexA shaders, their transformation pass has to
be adapted to the new control flow.
---
 src/shader_recompiler/CMakeLists.txt               |   1 +
 src/shader_recompiler/backend/glasm/emit_glasm.cpp |  41 +++-
 .../backend/glasm/emit_glasm_instructions.h        |   7 +-
 .../backend/glasm/emit_glasm_integer.cpp           |   9 +-
 .../backend/glasm/emit_glasm_not_implemented.cpp   |  24 +--
 .../backend/spirv/emit_context.cpp                 |   9 -
 src/shader_recompiler/backend/spirv/emit_context.h |   1 -
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |  71 ++++++-
 .../backend/spirv/emit_spirv_control_flow.cpp      |  33 +--
 .../backend/spirv/emit_spirv_instructions.h        |   9 +-
 .../frontend/ir/abstract_syntax_list.h             |  56 +++++
 src/shader_recompiler/frontend/ir/basic_block.cpp  |  56 ++---
 src/shader_recompiler/frontend/ir/basic_block.h    |  51 +----
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  60 ++----
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  11 +-
 .../frontend/ir/microinstruction.cpp               |  11 +-
 src/shader_recompiler/frontend/ir/opcodes.h        |   1 -
 src/shader_recompiler/frontend/ir/opcodes.inc      |  21 +-
 src/shader_recompiler/frontend/ir/post_order.cpp   |  36 ++--
 src/shader_recompiler/frontend/ir/post_order.h     |   3 +-
 src/shader_recompiler/frontend/ir/program.h        |   4 +-
 src/shader_recompiler/frontend/ir/type.h           |  49 +++--
 src/shader_recompiler/frontend/ir/value.cpp        |   8 -
 src/shader_recompiler/frontend/ir/value.h          |   9 -
 src/shader_recompiler/frontend/maxwell/program.cpp |  24 ++-
 .../frontend/maxwell/structured_control_flow.cpp   | 235 ++++++++++++---------
 .../frontend/maxwell/structured_control_flow.h     |  12 +-
 .../frontend/maxwell/translate/translate.cpp       |   7 +-
 .../frontend/maxwell/translate/translate.h         |   2 +-
 .../ir_opt/constant_propagation_pass.cpp           |  20 --
 src/shader_recompiler/ir_opt/dual_vertex_pass.cpp  |  56 +----
 .../ir_opt/identity_removal_pass.cpp               |   1 -
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  |   4 +-
 33 files changed, 437 insertions(+), 505 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/ir/abstract_syntax_list.h

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index f829b8d32c..0d55924a7c 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -51,6 +51,7 @@ add_library(shader_recompiler STATIC
     backend/spirv/emit_spirv_warp.cpp
     environment.h
     exception.h
+    frontend/ir/abstract_syntax_list.h
     frontend/ir/attribute.cpp
     frontend/ir/attribute.h
     frontend/ir/basic_block.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
index 056d8cbf8d..51ca83d189 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -117,8 +117,6 @@ auto Arg(EmitContext& ctx, const IR::Value& arg) {
         return Identity<const IR::Value&>{arg};
     } else if constexpr (std::is_same_v<ArgType, u32>) {
         return Identity{arg.U32()};
-    } else if constexpr (std::is_same_v<ArgType, IR::Block*>) {
-        return Identity{arg.Label()};
     } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
         return Identity{arg.Attribute()};
     } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
@@ -177,6 +175,39 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) {
     throw LogicError("Invalid opcode {}", inst->GetOpcode());
 }
 
+void EmitCode(EmitContext& ctx, const IR::Program& program) {
+    const auto eval{
+        [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }};
+    for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+        switch (node.type) {
+        case IR::AbstractSyntaxNode::Type::Block:
+            for (IR::Inst& inst : node.block->Instructions()) {
+                EmitInst(ctx, &inst);
+            }
+            break;
+        case IR::AbstractSyntaxNode::Type::If:
+            ctx.Add("MOV.S.CC RC,{};IF NE.x;", eval(node.if_node.cond));
+            break;
+        case IR::AbstractSyntaxNode::Type::EndIf:
+            ctx.Add("ENDIF;");
+            break;
+        case IR::AbstractSyntaxNode::Type::Loop:
+            ctx.Add("REP;");
+            break;
+        case IR::AbstractSyntaxNode::Type::Repeat:
+            ctx.Add("MOV.S.CC RC,{};BRK NE.x;ENDREP;", eval(node.repeat.cond));
+            break;
+        case IR::AbstractSyntaxNode::Type::Break:
+            ctx.Add("MOV.S.CC RC,{};BRK NE.x;", eval(node.repeat.cond));
+            break;
+        case IR::AbstractSyntaxNode::Type::Return:
+        case IR::AbstractSyntaxNode::Type::Unreachable:
+            ctx.Add("RET;");
+            break;
+        }
+    }
+}
+
 void SetupOptions(std::string& header, Info info) {
     if (info.uses_int64_bit_atomics) {
         header += "OPTION NV_shader_atomic_int64;";
@@ -201,11 +232,7 @@ void SetupOptions(std::string& header, Info info) {
 
 std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) {
     EmitContext ctx{program};
-    for (IR::Block* const block : program.blocks) {
-        for (IR::Inst& inst : block->Instructions()) {
-            EmitInst(ctx, &inst);
-        }
-    }
+    EmitCode(ctx, program);
     std::string header = "!!NVcp5.0\n"
                          "OPTION NV_internal;";
     SetupOptions(header, program.info);
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
index 8202354fea..0f7f16e6e2 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -22,13 +22,8 @@ class EmitContext;
 void EmitPhi(EmitContext& ctx, IR::Inst& inst);
 void EmitVoid(EmitContext& ctx);
 void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
-void EmitBranch(EmitContext& ctx);
-void EmitBranchConditional(EmitContext& ctx);
-void EmitLoopMerge(EmitContext& ctx);
-void EmitSelectionMerge(EmitContext& ctx);
-void EmitReturn(EmitContext& ctx);
+void EmitBranchConditionRef(EmitContext&);
 void EmitJoin(EmitContext& ctx);
-void EmitUnreachable(EmitContext& ctx);
 void EmitDemoteToHelperInvocation(EmitContext& ctx);
 void EmitBarrier(EmitContext& ctx);
 void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
index 15fd233560..adcc0404b1 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
@@ -91,7 +91,8 @@ void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scalar
     if (count.type != Type::Register && offset.type != Type::Register) {
         ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base);
     } else {
-        ctx.Add("MOV.S RC.x,{};MOV.U RC.y,{};"
+        ctx.Add("MOV.S RC.x,{};"
+                "MOV.S RC.y,{};"
                 "BFI.S {},RC,{},{};",
                 count, offset, ret, insert, base);
     }
@@ -103,7 +104,8 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal
     if (count.type != Type::Register && offset.type != Type::Register) {
         ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base);
     } else {
-        ctx.Add("MOV.S RC.x,{};MOV.U RC.y,{};"
+        ctx.Add("MOV.S RC.x,{};"
+                "MOV.S RC.y,{};"
                 "BFE.S {},RC,{};",
                 count, offset, ret, base);
     }
@@ -115,7 +117,8 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal
     if (count.type != Type::Register && offset.type != Type::Register) {
         ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
     } else {
-        ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};"
+        ctx.Add("MOV.U RC.x,{};"
+                "MOV.U RC.y,{};"
                 "BFE.U {},RC,{};",
                 count, offset, ret, base);
     }
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
index b40d09f8c1..f37ad55879 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
@@ -23,34 +23,12 @@ void EmitPhi(EmitContext& ctx, IR::Inst& inst) {
 
 void EmitVoid(EmitContext&) {}
 
-void EmitBranch(EmitContext& ctx) {
-    NotImplemented();
-}
-
-void EmitBranchConditional(EmitContext& ctx) {
-    NotImplemented();
-}
-
-void EmitLoopMerge(EmitContext& ctx) {
-    NotImplemented();
-}
-
-void EmitSelectionMerge(EmitContext& ctx) {
-    NotImplemented();
-}
-
-void EmitReturn(EmitContext& ctx) {
-    ctx.Add("RET;");
-}
+void EmitBranchConditionRef(EmitContext&) {}
 
 void EmitJoin(EmitContext& ctx) {
     NotImplemented();
 }
 
-void EmitUnreachable(EmitContext& ctx) {
-    NotImplemented();
-}
-
 void EmitDemoteToHelperInvocation(EmitContext& ctx) {
     NotImplemented();
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 9759591bdd..a98e08392c 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -463,7 +463,6 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings
     DefineImages(program.info, image_binding);
     DefineAttributeMemAccess(program.info);
     DefineGlobalMemoryFunctions(program.info);
-    DefineLabels(program);
 }
 
 EmitContext::~EmitContext() = default;
@@ -487,8 +486,6 @@ Id EmitContext::Def(const IR::Value& value) {
         return Const(value.F32());
     case IR::Type::F64:
         return Constant(F64[1], value.F64());
-    case IR::Type::Label:
-        return value.Label()->Definition<Id>();
     default:
         throw NotImplementedException("Immediate type {}", value.Type());
     }
@@ -1139,12 +1136,6 @@ void EmitContext::DefineImages(const Info& info, u32& binding) {
     }
 }
 
-void EmitContext::DefineLabels(IR::Program& program) {
-    for (IR::Block* const block : program.blocks) {
-        block->SetDefinition(OpLabel());
-    }
-}
-
 void EmitContext::DefineInputs(const Info& info) {
     if (info.uses_workgroup_id) {
         workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 8b000f1ec8..d2b79f6c17 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -296,7 +296,6 @@ private:
     void DefineImages(const Info& info, u32& binding);
     void DefineAttributeMemAccess(const Info& info);
     void DefineGlobalMemoryFunctions(const Info& info);
-    void DefineLabels(IR::Program& program);
 
     void DefineInputs(const Info& info);
     void DefineOutputs(const IR::Program& program);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 2dad87e872..c22edfec2a 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -41,8 +41,6 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
         return arg;
     } else if constexpr (std::is_same_v<ArgType, u32>) {
         return arg.U32();
-    } else if constexpr (std::is_same_v<ArgType, IR::Block*>) {
-        return arg.Label();
     } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
         return arg.Attribute();
     } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
@@ -109,15 +107,74 @@ Id TypeId(const EmitContext& ctx, IR::Type type) {
     }
 }
 
+void Traverse(EmitContext& ctx, IR::Program& program) {
+    IR::Block* current_block{};
+    for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+        switch (node.type) {
+        case IR::AbstractSyntaxNode::Type::Block:
+            const Id label{node.block->Definition<Id>()};
+            if (current_block) {
+                ctx.OpBranch(label);
+            }
+            current_block = node.block;
+            ctx.AddLabel(label);
+            for (IR::Inst& inst : node.block->Instructions()) {
+                EmitInst(ctx, &inst);
+            }
+            break;
+        case IR::AbstractSyntaxNode::Type::If: {
+            const Id if_label{node.if_node.body->Definition<Id>()};
+            const Id endif_label{node.if_node.merge->Definition<Id>()};
+            ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
+            ctx.OpBranchConditional(ctx.Def(node.if_node.cond), if_label, endif_label);
+            break;
+        }
+        case IR::AbstractSyntaxNode::Type::Loop: {
+            const Id body_label{node.loop.body->Definition<Id>()};
+            const Id continue_label{node.loop.continue_block->Definition<Id>()};
+            const Id endloop_label{node.loop.merge->Definition<Id>()};
+
+            ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone);
+            ctx.OpBranch(node.loop.body->Definition<Id>());
+            break;
+        }
+        case IR::AbstractSyntaxNode::Type::Break: {
+            const Id break_label{node.break_node.merge->Definition<Id>()};
+            const Id skip_label{node.break_node.skip->Definition<Id>()};
+            ctx.OpBranchConditional(ctx.Def(node.break_node.cond), break_label, skip_label);
+            break;
+        }
+        case IR::AbstractSyntaxNode::Type::EndIf:
+            if (current_block) {
+                ctx.OpBranch(node.end_if.merge->Definition<Id>());
+            }
+            break;
+        case IR::AbstractSyntaxNode::Type::Repeat: {
+            const Id loop_header_label{node.repeat.loop_header->Definition<Id>()};
+            const Id merge_label{node.repeat.merge->Definition<Id>()};
+            ctx.OpBranchConditional(ctx.Def(node.repeat.cond), loop_header_label, merge_label);
+            break;
+        }
+        case IR::AbstractSyntaxNode::Type::Return:
+            ctx.OpReturn();
+            break;
+        case IR::AbstractSyntaxNode::Type::Unreachable:
+            ctx.OpUnreachable();
+            break;
+        }
+        if (node.type != IR::AbstractSyntaxNode::Type::Block) {
+            current_block = nullptr;
+        }
+    }
+}
+
 Id DefineMain(EmitContext& ctx, IR::Program& program) {
     const Id void_function{ctx.TypeFunction(ctx.void_id)};
     const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)};
     for (IR::Block* const block : program.blocks) {
-        ctx.AddLabel(block->Definition<Id>());
-        for (IR::Inst& inst : block->Instructions()) {
-            EmitInst(ctx, &inst);
-        }
+        block->SetDefinition(ctx.OpLabel());
     }
+    Traverse(ctx, program);
     ctx.OpFunctionEnd();
     return main;
 }
@@ -411,6 +468,8 @@ Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
     return id;
 }
 
+void EmitBranchConditionRef(EmitContext&) {}
+
 void EmitGetZeroFromOp(EmitContext&) {
     throw LogicError("Unreachable instruction");
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
index 6154c46be4..d33486f282 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -7,40 +7,21 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitBranch(EmitContext& ctx, Id label) {
-    ctx.OpBranch(label);
-}
-
-void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label) {
-    ctx.OpBranchConditional(condition, true_label, false_label);
-}
-
-void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label) {
-    ctx.OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone);
-}
-
-void EmitSelectionMerge(EmitContext& ctx, Id merge_label) {
-    ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
-}
-
-void EmitReturn(EmitContext& ctx) {
-    ctx.OpReturn();
-}
-
 void EmitJoin(EmitContext&) {
     throw NotImplementedException("Join shouldn't be emitted");
 }
 
-void EmitUnreachable(EmitContext& ctx) {
-    ctx.OpUnreachable();
-}
-
-void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) {
+void EmitDemoteToHelperInvocation(EmitContext& ctx) {
     if (ctx.profile.support_demote_to_helper_invocation) {
         ctx.OpDemoteToHelperInvocationEXT();
-        ctx.OpBranch(continue_label);
     } else {
+        const Id kill_label{ctx.OpLabel()};
+        const Id impossible_label{ctx.OpLabel()};
+        ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone);
+        ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label);
+        ctx.AddLabel(kill_label);
         ctx.OpKill();
+        ctx.AddLabel(impossible_label);
     }
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index a1ca3f43de..2f4f6e59ed 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -23,14 +23,9 @@ class EmitContext;
 Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
 void EmitVoid(EmitContext& ctx);
 Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
-void EmitBranch(EmitContext& ctx, Id label);
-void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label);
-void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label);
-void EmitSelectionMerge(EmitContext& ctx, Id merge_label);
-void EmitReturn(EmitContext& ctx);
+void EmitBranchConditionRef(EmitContext&);
 void EmitJoin(EmitContext& ctx);
-void EmitUnreachable(EmitContext& ctx);
-void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label);
+void EmitDemoteToHelperInvocation(EmitContext& ctx);
 void EmitBarrier(EmitContext& ctx);
 void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
 void EmitDeviceMemoryBarrier(EmitContext& ctx);
diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
new file mode 100644
index 0000000000..1366414c25
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
@@ -0,0 +1,56 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+class Block;
+
+struct AbstractSyntaxNode {
+    enum class Type {
+        Block,
+        If,
+        EndIf,
+        Loop,
+        Repeat,
+        Break,
+        Return,
+        Unreachable,
+    };
+    Type type{};
+    union {
+        Block* block{};
+        struct {
+            U1 cond;
+            Block* body;
+            Block* merge;
+        } if_node;
+        struct {
+            Block* merge;
+        } end_if;
+        struct {
+            Block* body;
+            Block* continue_block;
+            Block* merge;
+        } loop;
+        struct {
+            U1 cond;
+            Block* loop_header;
+            Block* merge;
+        } repeat;
+        struct {
+            U1 cond;
+            Block* merge;
+            Block* skip;
+        } break_node;
+    };
+};
+using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index f92fc2571c..7c08b25ce2 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -14,10 +14,7 @@
 
 namespace Shader::IR {
 
-Block::Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end)
-    : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {}
-
-Block::Block(ObjectPool<Inst>& inst_pool_) : Block{inst_pool_, 0, 0} {}
+Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
 
 Block::~Block() = default;
 
@@ -40,39 +37,15 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
     return result_it;
 }
 
-void Block::SetBranches(Condition cond, Block* branch_true_, Block* branch_false_) {
-    branch_cond = cond;
-    branch_true = branch_true_;
-    branch_false = branch_false_;
-}
-
-void Block::SetBranch(Block* branch) {
-    branch_cond = Condition{true};
-    branch_true = branch;
-}
-
-void Block::SetReturn() {
-    branch_cond = Condition{true};
-    branch_true = nullptr;
-    branch_false = nullptr;
-}
-
-bool Block::IsVirtual() const noexcept {
-    return location_begin == location_end;
-}
-
-u32 Block::LocationBegin() const noexcept {
-    return location_begin;
-}
-
-u32 Block::LocationEnd() const noexcept {
-    return location_end;
-}
-
-void Block::AddImmediatePredecessor(Block* block) {
-    if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) {
-        imm_predecessors.push_back(block);
+void Block::AddBranch(Block* block) {
+    if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
+        throw LogicError("Successor already inserted");
+    }
+    if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
+        throw LogicError("Predecessor already inserted");
     }
+    imm_successors.push_back(block);
+    block->imm_predecessors.push_back(this);
 }
 
 static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index,
@@ -92,15 +65,11 @@ static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& in
     return it->second;
 }
 
-static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_index,
-                              std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
+static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
                               const Value& arg) {
     if (arg.IsEmpty()) {
         return "<null>";
     }
-    if (arg.IsLabel()) {
-        return BlockToIndex(block_to_index, arg.Label());
-    }
     if (!arg.IsImmediate() || arg.IsIdentity()) {
         return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst()));
     }
@@ -140,8 +109,7 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
     if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) {
         ret += fmt::format(" ${}", it->second);
     }
-    ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd());
-
+    ret += '\n';
     for (const Inst& inst : block) {
         const Opcode op{inst.GetOpcode()};
         ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
@@ -153,7 +121,7 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
         const size_t arg_count{inst.NumArgs()};
         for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
             const Value arg{inst.Arg(arg_index)};
-            const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)};
+            const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)};
             ret += arg_index != 0 ? ", " : " ";
             if (op == Opcode::Phi) {
                 ret += fmt::format("[ {}, {} ]", arg_str,
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
index 0b0c97af6b..7e134b4c78 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.h
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -12,6 +12,7 @@
 #include <boost/intrusive/list.hpp>
 
 #include "common/bit_cast.h"
+#include "common/common_types.h"
 #include "shader_recompiler/frontend/ir/condition.h"
 #include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/object_pool.h"
@@ -27,7 +28,6 @@ public:
     using reverse_iterator = InstructionList::reverse_iterator;
     using const_reverse_iterator = InstructionList::const_reverse_iterator;
 
-    explicit Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end);
     explicit Block(ObjectPool<Inst>& inst_pool_);
     ~Block();
 
@@ -44,22 +44,8 @@ public:
     iterator PrependNewInst(iterator insertion_point, Opcode op,
                             std::initializer_list<Value> args = {}, u32 flags = 0);
 
-    /// Set the branches to jump to when all instructions have executed.
-    void SetBranches(Condition cond, Block* branch_true, Block* branch_false);
-    /// Set the branch to unconditionally jump to when all instructions have executed.
-    void SetBranch(Block* branch);
-    /// Mark the block as a return block.
-    void SetReturn();
-
-    /// Returns true when the block does not implement any guest instructions directly.
-    [[nodiscard]] bool IsVirtual() const noexcept;
-    /// Gets the starting location of this basic block.
-    [[nodiscard]] u32 LocationBegin() const noexcept;
-    /// Gets the end location for this basic block.
-    [[nodiscard]] u32 LocationEnd() const noexcept;
-
-    /// Adds a new immediate predecessor to this basic block.
-    void AddImmediatePredecessor(Block* block);
+    /// Adds a new branch to this basic block.
+    void AddBranch(Block* block);
 
     /// Gets a mutable reference to the instruction list for this basic block.
     [[nodiscard]] InstructionList& Instructions() noexcept {
@@ -71,9 +57,13 @@ public:
     }
 
     /// Gets an immutable span to the immediate predecessors.
-    [[nodiscard]] std::span<Block* const> ImmediatePredecessors() const noexcept {
+    [[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept {
         return imm_predecessors;
     }
+    /// Gets an immutable span to the immediate successors.
+    [[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept {
+        return imm_successors;
+    }
 
     /// Intrusively store the host definition of this instruction.
     template <typename DefinitionType>
@@ -87,19 +77,6 @@ public:
         return Common::BitCast<DefinitionType>(definition);
     }
 
-    [[nodiscard]] Condition BranchCondition() const noexcept {
-        return branch_cond;
-    }
-    [[nodiscard]] bool IsTerminationBlock() const noexcept {
-        return !branch_true && !branch_false;
-    }
-    [[nodiscard]] Block* TrueBranch() const noexcept {
-        return branch_true;
-    }
-    [[nodiscard]] Block* FalseBranch() const noexcept {
-        return branch_false;
-    }
-
     void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept {
         ssa_reg_values[RegIndex(reg)] = value;
     }
@@ -178,22 +155,14 @@ public:
 private:
     /// Memory pool for instruction list
     ObjectPool<Inst>* inst_pool;
-    /// Starting location of this block
-    u32 location_begin;
-    /// End location of this block
-    u32 location_end;
 
     /// List of instructions in this block
     InstructionList instructions;
 
-    /// Condition to choose the branch to take
-    Condition branch_cond{true};
-    /// Block to jump into when the branch condition evaluates as true
-    Block* branch_true{nullptr};
-    /// Block to jump into when the branch condition evaluates as false
-    Block* branch_false{nullptr};
     /// Block immediate predecessors
     std::vector<Block*> imm_predecessors;
+    /// Block immediate successors
+    std::vector<Block*> imm_successors;
 
     /// Intrusively store the value of a register in the block.
     std::array<Value, NUM_REGS> ssa_reg_values;
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index ce6c9af073..eb45aa4772 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -61,25 +61,28 @@ F64 IREmitter::Imm64(f64 value) const {
     return F64{Value{value}};
 }
 
-void IREmitter::Branch(Block* label) {
-    label->AddImmediatePredecessor(block);
-    block->SetBranch(label);
-    Inst(Opcode::Branch, label);
+void IREmitter::Prologue() {
+    Inst(Opcode::Prologue);
 }
 
-void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) {
-    block->SetBranches(IR::Condition{true}, true_label, false_label);
-    true_label->AddImmediatePredecessor(block);
-    false_label->AddImmediatePredecessor(block);
-    Inst(Opcode::BranchConditional, condition, true_label, false_label);
+void IREmitter::Epilogue() {
+    Inst(Opcode::Epilogue);
 }
 
-void IREmitter::LoopMerge(Block* merge_block, Block* continue_target) {
-    Inst(Opcode::LoopMerge, merge_block, continue_target);
+void IREmitter::BranchConditionRef(const U1& cond) {
+    Inst(Opcode::BranchConditionRef, cond);
 }
 
-void IREmitter::SelectionMerge(Block* merge_block) {
-    Inst(Opcode::SelectionMerge, merge_block);
+void IREmitter::DemoteToHelperInvocation() {
+    Inst(Opcode::DemoteToHelperInvocation);
+}
+
+void IREmitter::EmitVertex(const U32& stream) {
+    Inst(Opcode::EmitVertex, stream);
+}
+
+void IREmitter::EndPrimitive(const U32& stream) {
+    Inst(Opcode::EndPrimitive, stream);
 }
 
 void IREmitter::Barrier() {
@@ -94,37 +97,6 @@ void IREmitter::DeviceMemoryBarrier() {
     Inst(Opcode::DeviceMemoryBarrier);
 }
 
-void IREmitter::Return() {
-    block->SetReturn();
-    Inst(Opcode::Return);
-}
-
-void IREmitter::Unreachable() {
-    Inst(Opcode::Unreachable);
-}
-
-void IREmitter::DemoteToHelperInvocation(Block* continue_label) {
-    block->SetBranch(continue_label);
-    continue_label->AddImmediatePredecessor(block);
-    Inst(Opcode::DemoteToHelperInvocation, continue_label);
-}
-
-void IREmitter::Prologue() {
-    Inst(Opcode::Prologue);
-}
-
-void IREmitter::Epilogue() {
-    Inst(Opcode::Epilogue);
-}
-
-void IREmitter::EmitVertex(const U32& stream) {
-    Inst(Opcode::EmitVertex, stream);
-}
-
-void IREmitter::EndPrimitive(const U32& stream) {
-    Inst(Opcode::EndPrimitive, stream);
-}
-
 U32 IREmitter::GetReg(IR::Reg reg) {
     return Inst<U32>(Opcode::GetRegister, reg);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index fd41b7e893..7a83c33d33 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -32,17 +32,10 @@ public:
     [[nodiscard]] U64 Imm64(s64 value) const;
     [[nodiscard]] F64 Imm64(f64 value) const;
 
-    void Branch(Block* label);
-    void BranchConditional(const U1& condition, Block* true_label, Block* false_label);
-    void LoopMerge(Block* merge_block, Block* continue_target);
-    void SelectionMerge(Block* merge_block);
-    void Return();
-    void Unreachable();
-    void DemoteToHelperInvocation(Block* continue_label);
-
     void Prologue();
     void Epilogue();
-
+    void BranchConditionRef(const U1& cond);
+    void DemoteToHelperInvocation();
     void EmitVertex(const U32& stream);
     void EndPrimitive(const U32& stream);
 
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 616ef17d4e..3645742401 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -56,19 +56,14 @@ Inst::~Inst() {
 
 bool Inst::MayHaveSideEffects() const noexcept {
     switch (op) {
-    case Opcode::Branch:
-    case Opcode::BranchConditional:
-    case Opcode::LoopMerge:
-    case Opcode::SelectionMerge:
-    case Opcode::Return:
+    case Opcode::Prologue:
+    case Opcode::Epilogue:
+    case Opcode::BranchConditionRef:
     case Opcode::Join:
-    case Opcode::Unreachable:
     case Opcode::DemoteToHelperInvocation:
     case Opcode::Barrier:
     case Opcode::WorkgroupMemoryBarrier:
     case Opcode::DeviceMemoryBarrier:
-    case Opcode::Prologue:
-    case Opcode::Epilogue:
     case Opcode::EmitVertex:
     case Opcode::EndPrimitive:
     case Opcode::SetAttribute:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h
index 2b9c0ed8cc..56b001902c 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.h
+++ b/src/shader_recompiler/frontend/ir/opcodes.h
@@ -30,7 +30,6 @@ struct OpcodeMeta {
 // using enum Type;
 constexpr Type Void{Type::Void};
 constexpr Type Opaque{Type::Opaque};
-constexpr Type Label{Type::Label};
 constexpr Type Reg{Type::Reg};
 constexpr Type Pred{Type::Pred};
 constexpr Type Attribute{Type::Attribute};
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 9165421f89..75ddb6b6f8 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -7,27 +7,20 @@ OPCODE(Phi,                                                 Opaque,
 OPCODE(Identity,                                            Opaque,         Opaque,                                                                         )
 OPCODE(Void,                                                Void,                                                                                           )
 
-// Control flow
-OPCODE(Branch,                                              Void,           Label,                                                                          )
-OPCODE(BranchConditional,                                   Void,           U1,             Label,          Label,                                          )
-OPCODE(LoopMerge,                                           Void,           Label,          Label,                                                          )
-OPCODE(SelectionMerge,                                      Void,           Label,                                                                          )
-OPCODE(Return,                                              Void,                                                                                           )
+// Special operations
+OPCODE(Prologue,                                            Void,                                                                                           )
+OPCODE(Epilogue,                                            Void,                                                                                           )
+OPCODE(BranchConditionRef,                                  Void,           U1,                                                                             )
 OPCODE(Join,                                                Void,                                                                                           )
-OPCODE(Unreachable,                                         Void,                                                                                           )
-OPCODE(DemoteToHelperInvocation,                            Void,           Label,                                                                          )
+OPCODE(DemoteToHelperInvocation,                            Void,                                                                                           )
+OPCODE(EmitVertex,                                          Void,           U32,                                                                            )
+OPCODE(EndPrimitive,                                        Void,           U32,                                                                            )
 
 // Barriers
 OPCODE(Barrier,                                             Void,                                                                                           )
 OPCODE(WorkgroupMemoryBarrier,                              Void,                                                                                           )
 OPCODE(DeviceMemoryBarrier,                                 Void,                                                                                           )
 
-// Special operations
-OPCODE(Prologue,                                            Void,                                                                                           )
-OPCODE(Epilogue,                                            Void,                                                                                           )
-OPCODE(EmitVertex,                                          Void,           U32,                                                                            )
-OPCODE(EndPrimitive,                                        Void,           U32,                                                                            )
-
 // Context getters/setters
 OPCODE(GetRegister,                                         U32,            Reg,                                                                            )
 OPCODE(SetRegister,                                         Void,           Reg,            U32,                                                            )
diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp
index 8709a2ea1e..1a28df7fbe 100644
--- a/src/shader_recompiler/frontend/ir/post_order.cpp
+++ b/src/shader_recompiler/frontend/ir/post_order.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
+
 #include <boost/container/flat_set.hpp>
 #include <boost/container/small_vector.hpp>
 
@@ -10,35 +12,31 @@
 
 namespace Shader::IR {
 
-BlockList PostOrder(const BlockList& blocks) {
+BlockList PostOrder(const AbstractSyntaxNode& root) {
     boost::container::small_vector<Block*, 16> block_stack;
     boost::container::flat_set<Block*> visited;
-
     BlockList post_order_blocks;
-    post_order_blocks.reserve(blocks.size());
 
-    Block* const first_block{blocks.front()};
+    if (root.type != AbstractSyntaxNode::Type::Block) {
+        throw LogicError("First node in abstract syntax list root is not a block");
+    }
+    Block* const first_block{root.block};
     visited.insert(first_block);
     block_stack.push_back(first_block);
 
-    const auto visit_branch = [&](Block* block, Block* branch) {
-        if (!branch) {
-            return false;
-        }
-        if (!visited.insert(branch).second) {
-            return false;
-        }
-        // Calling push_back twice is faster than insert on MSVC
-        block_stack.push_back(block);
-        block_stack.push_back(branch);
-        return true;
-    };
     while (!block_stack.empty()) {
         Block* const block{block_stack.back()};
+        const auto visit{[&](Block* branch) {
+            if (!visited.insert(branch).second) {
+                return false;
+            }
+            // Calling push_back twice is faster than insert on MSVC
+            block_stack.push_back(block);
+            block_stack.push_back(branch);
+            return true;
+        }};
         block_stack.pop_back();
-
-        if (!visit_branch(block, block->TrueBranch()) &&
-            !visit_branch(block, block->FalseBranch())) {
+        if (std::ranges::none_of(block->ImmSuccessors(), visit)) {
             post_order_blocks.push_back(block);
         }
     }
diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h
index 30137ff57a..58a0467a03 100644
--- a/src/shader_recompiler/frontend/ir/post_order.h
+++ b/src/shader_recompiler/frontend/ir/post_order.h
@@ -5,9 +5,10 @@
 #pragma once
 
 #include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
 
 namespace Shader::IR {
 
-BlockList PostOrder(const BlockList& blocks);
+BlockList PostOrder(const AbstractSyntaxNode& root);
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
index 51e1a8c779..9ede5b48d9 100644
--- a/src/shader_recompiler/frontend/ir/program.h
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -7,8 +7,7 @@
 #include <array>
 #include <string>
 
-#include <boost/container/small_vector.hpp>
-
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
 #include "shader_recompiler/program_header.h"
 #include "shader_recompiler/shader_info.h"
@@ -17,6 +16,7 @@
 namespace Shader::IR {
 
 struct Program {
+    AbstractSyntaxList syntax_list;
     BlockList blocks;
     BlockList post_order_blocks;
     Info info;
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
index 8b3b338528..294b230c49 100644
--- a/src/shader_recompiler/frontend/ir/type.h
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -16,31 +16,30 @@ namespace Shader::IR {
 enum class Type {
     Void = 0,
     Opaque = 1 << 0,
-    Label = 1 << 1,
-    Reg = 1 << 2,
-    Pred = 1 << 3,
-    Attribute = 1 << 4,
-    Patch = 1 << 5,
-    U1 = 1 << 6,
-    U8 = 1 << 7,
-    U16 = 1 << 8,
-    U32 = 1 << 9,
-    U64 = 1 << 10,
-    F16 = 1 << 11,
-    F32 = 1 << 12,
-    F64 = 1 << 13,
-    U32x2 = 1 << 14,
-    U32x3 = 1 << 15,
-    U32x4 = 1 << 16,
-    F16x2 = 1 << 17,
-    F16x3 = 1 << 18,
-    F16x4 = 1 << 19,
-    F32x2 = 1 << 20,
-    F32x3 = 1 << 21,
-    F32x4 = 1 << 22,
-    F64x2 = 1 << 23,
-    F64x3 = 1 << 24,
-    F64x4 = 1 << 25,
+    Reg = 1 << 1,
+    Pred = 1 << 2,
+    Attribute = 1 << 3,
+    Patch = 1 << 4,
+    U1 = 1 << 5,
+    U8 = 1 << 6,
+    U16 = 1 << 7,
+    U32 = 1 << 8,
+    U64 = 1 << 9,
+    F16 = 1 << 10,
+    F32 = 1 << 11,
+    F64 = 1 << 12,
+    U32x2 = 1 << 13,
+    U32x3 = 1 << 14,
+    U32x4 = 1 << 15,
+    F16x2 = 1 << 16,
+    F16x3 = 1 << 17,
+    F16x4 = 1 << 18,
+    F32x2 = 1 << 19,
+    F32x3 = 1 << 20,
+    F32x4 = 1 << 21,
+    F64x2 = 1 << 22,
+    F64x3 = 1 << 23,
+    F64x4 = 1 << 24,
 };
 DECLARE_ENUM_FLAG_OPERATORS(Type)
 
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index b962f170d7..d365ea1bcb 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -9,8 +9,6 @@ namespace Shader::IR {
 
 Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {}
 
-Value::Value(IR::Block* value) noexcept : type{Type::Label}, label{value} {}
-
 Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {}
 
 Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {}
@@ -33,10 +31,6 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
 
 Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
 
-bool Value::IsLabel() const noexcept {
-    return type == Type::Label;
-}
-
 IR::Type Value::Type() const noexcept {
     if (IsPhi()) {
         // The type of a phi node is stored in its flags
@@ -60,8 +54,6 @@ bool Value::operator==(const Value& other) const {
         return true;
     case Type::Opaque:
         return inst == other.inst;
-    case Type::Label:
-        return label == other.label;
     case Type::Reg:
         return reg == other.reg;
     case Type::Pred:
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index beaf149f3f..2ce49f953d 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -37,7 +37,6 @@ class Value {
 public:
     Value() noexcept = default;
     explicit Value(IR::Inst* value) noexcept;
-    explicit Value(IR::Block* value) noexcept;
     explicit Value(IR::Reg value) noexcept;
     explicit Value(IR::Pred value) noexcept;
     explicit Value(IR::Attribute value) noexcept;
@@ -54,11 +53,9 @@ public:
     [[nodiscard]] bool IsPhi() const noexcept;
     [[nodiscard]] bool IsEmpty() const noexcept;
     [[nodiscard]] bool IsImmediate() const noexcept;
-    [[nodiscard]] bool IsLabel() const noexcept;
     [[nodiscard]] IR::Type Type() const noexcept;
 
     [[nodiscard]] IR::Inst* Inst() const;
-    [[nodiscard]] IR::Block* Label() const;
     [[nodiscard]] IR::Inst* InstRecursive() const;
     [[nodiscard]] IR::Value Resolve() const;
     [[nodiscard]] IR::Reg Reg() const;
@@ -80,7 +77,6 @@ private:
     IR::Type type{};
     union {
         IR::Inst* inst{};
-        IR::Block* label;
         IR::Reg reg;
         IR::Pred pred;
         IR::Attribute attribute;
@@ -304,11 +300,6 @@ inline IR::Inst* Value::Inst() const {
     return inst;
 }
 
-inline IR::Block* Value::Label() const {
-    DEBUG_ASSERT(type == Type::Label);
-    return label;
-}
-
 inline IR::Inst* Value::InstRecursive() const {
     DEBUG_ASSERT(type == Type::Opaque);
     if (IsIdentity()) {
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 0d3f006991..017c4b8fdc 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -4,6 +4,7 @@
 
 #include <algorithm>
 #include <memory>
+#include <ranges>
 #include <vector>
 
 #include "shader_recompiler/frontend/ir/basic_block.h"
@@ -15,6 +16,16 @@
 
 namespace Shader::Maxwell {
 namespace {
+IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
+    auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) {
+                           return node.type == IR::AbstractSyntaxNode::Type::Block;
+                       })};
+    IR::BlockList blocks(std::ranges::distance(syntax_blocks));
+    std::ranges::transform(syntax_blocks, blocks.begin(),
+                           [](const IR::AbstractSyntaxNode& node) { return node.block; });
+    return blocks;
+}
+
 void RemoveUnreachableBlocks(IR::Program& program) {
     // Some blocks might be unreachable if a function call exists unconditionally
     // If this happens the number of blocks and post order blocks will mismatch
@@ -23,7 +34,7 @@ void RemoveUnreachableBlocks(IR::Program& program) {
     }
     const auto begin{program.blocks.begin() + 1};
     const auto end{program.blocks.end()};
-    const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }};
+    const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
     program.blocks.erase(std::remove_if(begin, end, pred), end);
 }
 
@@ -110,8 +121,9 @@ void AddNVNStorageBuffers(IR::Program& program) {
 IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
                              Environment& env, Flow::CFG& cfg) {
     IR::Program program;
-    program.blocks = VisitAST(inst_pool, block_pool, env, cfg);
-    program.post_order_blocks = PostOrder(program.blocks);
+    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
+    program.blocks = GenerateBlocks(program.syntax_list);
+    program.post_order_blocks = PostOrder(program.syntax_list.front());
     program.stage = env.ShaderStage();
     program.local_memory_size = env.LocalMemorySize();
     switch (program.stage) {
@@ -159,9 +171,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b
     Optimization::VertexATransformPass(vertex_a);
     Optimization::VertexBTransformPass(vertex_b);
     std::swap(result.blocks, vertex_a.blocks);
-    for (IR::Block* block : vertex_b.blocks) {
-        result.blocks.push_back(block);
-    }
+    result.blocks.insert(result.blocks.end(), vertex_b.blocks.begin(), vertex_b.blocks.end());
     result.stage = Stage::VertexB;
     result.info = vertex_a.info;
     result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
@@ -173,7 +183,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b
     Optimization::JoinTextureInfo(result.info, vertex_b.info);
     Optimization::JoinStorageInfo(result.info, vertex_b.info);
     Optimization::DualVertexJoinPass(result);
-    result.post_order_blocks = PostOrder(result.blocks);
+    result.post_order_blocks = PostOrder(result.syntax_list.front());
     Optimization::DeadCodeEliminationPass(result);
     Optimization::VerificationPass(result);
     Optimization::CollectShaderInfoPass(env_vertex_b, result);
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index cc5410c6df..e7e2e9c826 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -36,7 +36,6 @@ using Tree = boost::intrusive::list<Statement,
                                     // Avoid linear complexity on splice, size is never called
                                     boost::intrusive::constant_time_size<false>>;
 using Node = Tree::iterator;
-using ConstNode = Tree::const_iterator;
 
 enum class StatementType {
     Code,
@@ -91,7 +90,8 @@ struct IndirectBranchCond {};
 #pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement
 #endif
 struct Statement : ListBaseHook {
-    Statement(IR::Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {}
+    Statement(const Flow::Block* block_, Statement* up_)
+        : block{block_}, up{up_}, type{StatementType::Code} {}
     Statement(Goto, Statement* cond_, Node label_, Statement* up_)
         : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {}
     Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {}
@@ -125,7 +125,7 @@ struct Statement : ListBaseHook {
     }
 
     union {
-        IR::Block* code;
+        const Flow::Block* block;
         Node label;
         Tree children;
         IR::Condition guest_cond;
@@ -171,8 +171,8 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) {
         switch (stmt->type) {
         case StatementType::Code:
             ret += fmt::format("{}    Block {:04x} -> {:04x} (0x{:016x});\n", indent,
-                               stmt->code->LocationBegin(), stmt->code->LocationEnd(),
-                               reinterpret_cast<uintptr_t>(stmt->code));
+                               stmt->block->begin, stmt->block->end,
+                               reinterpret_cast<uintptr_t>(stmt->block));
             break;
         case StatementType::Goto:
             ret += fmt::format("{}    if ({}) goto L{};\n", indent, DumpExpr(stmt->cond),
@@ -407,11 +407,7 @@ private:
             }};
             root.push_front(make_reset_variable());
             root.insert(ip, make_reset_variable());
-
-            const u32 begin_offset{block.begin.Offset()};
-            const u32 end_offset{block.end.Offset()};
-            IR::Block* const ir_block{block_pool.Create(inst_pool, begin_offset, end_offset)};
-            root.insert(ip, *pool.Create(ir_block, &root_stmt));
+            root.insert(ip, *pool.Create(&block, &root_stmt));
 
             switch (block.end_class) {
             case Flow::EndClass::Branch: {
@@ -620,13 +616,13 @@ private:
     Statement root_stmt{FunctionTag{}};
 };
 
-IR::Block* TryFindForwardBlock(const Statement& stmt) {
-    const Tree& tree{stmt.up->children};
-    const ConstNode end{tree.cend()};
-    ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))};
+[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) {
+    Tree& tree{stmt.up->children};
+    const Node end{tree.end()};
+    Node forward_node{std::next(Tree::s_iterator_to(stmt))};
     while (forward_node != end && !HasChildren(forward_node->type)) {
         if (forward_node->type == StatementType::Code) {
-            return forward_node->code;
+            return &*forward_node;
         }
         ++forward_node;
     }
@@ -654,21 +650,29 @@ class TranslatePass {
 public:
     TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
                   ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
-                  IR::BlockList& block_list_)
+                  IR::AbstractSyntaxList& syntax_list_)
         : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
-          block_list{block_list_} {
+          syntax_list{syntax_list_} {
         Visit(root_stmt, nullptr, nullptr);
 
-        IR::Block& first_block{*block_list.front()};
+        IR::Block& first_block{*syntax_list.front().block};
         IR::IREmitter ir{first_block, first_block.begin()};
         ir.Prologue();
     }
 
 private:
-    void Visit(Statement& parent, IR::Block* continue_block, IR::Block* break_block) {
+    void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) {
+        IR::Block* current_block{};
+        const auto ensure_block{[&] {
+            if (current_block) {
+                return;
+            }
+            current_block = block_pool.Create(inst_pool);
+            auto& node{syntax_list.emplace_back()};
+            node.type = IR::AbstractSyntaxNode::Type::Block;
+            node.block = current_block;
+        }};
         Tree& tree{parent.children};
-        IR::Block* current_block{nullptr};
-
         for (auto it = tree.begin(); it != tree.end(); ++it) {
             Statement& stmt{*it};
             switch (stmt.type) {
@@ -676,124 +680,157 @@ private:
                 // Labels can be ignored
                 break;
             case StatementType::Code: {
-                if (current_block && current_block != stmt.code) {
-                    IR::IREmitter{*current_block}.Branch(stmt.code);
-                }
-                current_block = stmt.code;
-                Translate(env, stmt.code);
-                block_list.push_back(stmt.code);
+                ensure_block();
+                Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset());
                 break;
             }
             case StatementType::SetVariable: {
-                if (!current_block) {
-                    current_block = MergeBlock(parent, stmt);
-                }
+                ensure_block();
                 IR::IREmitter ir{*current_block};
                 ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
                 break;
             }
             case StatementType::SetIndirectBranchVariable: {
-                if (!current_block) {
-                    current_block = MergeBlock(parent, stmt);
-                }
+                ensure_block();
                 IR::IREmitter ir{*current_block};
                 IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))};
                 ir.SetIndirectBranchVariable(address);
                 break;
             }
             case StatementType::If: {
-                if (!current_block) {
-                    current_block = block_pool.Create(inst_pool);
-                    block_list.push_back(current_block);
-                }
+                ensure_block();
                 IR::Block* const merge_block{MergeBlock(parent, stmt)};
 
-                // Visit children
-                const size_t first_block_index{block_list.size()};
-                Visit(stmt, merge_block, break_block);
-
                 // Implement if header block
-                IR::Block* const first_if_block{block_list.at(first_block_index)};
                 IR::IREmitter ir{*current_block};
                 const IR::U1 cond{VisitExpr(ir, *stmt.cond)};
-                ir.SelectionMerge(merge_block);
-                ir.BranchConditional(cond, first_if_block, merge_block);
+                ir.BranchConditionRef(cond);
 
+                const size_t if_node_index{syntax_list.size()};
+                syntax_list.emplace_back();
+
+                // Visit children
+                const size_t then_block_index{syntax_list.size()};
+                Visit(stmt, break_block, merge_block);
+
+                IR::Block* const then_block{syntax_list.at(then_block_index).block};
+                current_block->AddBranch(then_block);
+                current_block->AddBranch(merge_block);
                 current_block = merge_block;
+
+                auto& if_node{syntax_list[if_node_index]};
+                if_node.type = IR::AbstractSyntaxNode::Type::If;
+                if_node.if_node.cond = cond;
+                if_node.if_node.body = then_block;
+                if_node.if_node.merge = merge_block;
+
+                auto& endif_node{syntax_list.emplace_back()};
+                endif_node.type = IR::AbstractSyntaxNode::Type::EndIf;
+                endif_node.end_if.merge = merge_block;
+
+                auto& merge{syntax_list.emplace_back()};
+                merge.type = IR::AbstractSyntaxNode::Type::Block;
+                merge.block = merge_block;
                 break;
             }
             case StatementType::Loop: {
                 IR::Block* const loop_header_block{block_pool.Create(inst_pool)};
                 if (current_block) {
-                    IR::IREmitter{*current_block}.Branch(loop_header_block);
+                    current_block->AddBranch(loop_header_block);
                 }
-                block_list.push_back(loop_header_block);
+                auto& header_node{syntax_list.emplace_back()};
+                header_node.type = IR::AbstractSyntaxNode::Type::Block;
+                header_node.block = loop_header_block;
 
-                IR::Block* const new_continue_block{block_pool.Create(inst_pool)};
+                IR::Block* const continue_block{block_pool.Create(inst_pool)};
                 IR::Block* const merge_block{MergeBlock(parent, stmt)};
 
+                const size_t loop_node_index{syntax_list.size()};
+                syntax_list.emplace_back();
+
                 // Visit children
-                const size_t first_block_index{block_list.size()};
-                Visit(stmt, new_continue_block, merge_block);
+                const size_t body_block_index{syntax_list.size()};
+                Visit(stmt, merge_block, continue_block);
 
                 // The continue block is located at the end of the loop
-                block_list.push_back(new_continue_block);
+                IR::IREmitter ir{*continue_block};
+                const IR::U1 cond{VisitExpr(ir, *stmt.cond)};
+                ir.BranchConditionRef(cond);
 
-                // Implement loop header block
-                IR::Block* const first_loop_block{block_list.at(first_block_index)};
-                IR::IREmitter ir{*loop_header_block};
-                ir.LoopMerge(merge_block, new_continue_block);
-                ir.Branch(first_loop_block);
+                IR::Block* const body_block{syntax_list.at(body_block_index).block};
+                loop_header_block->AddBranch(body_block);
 
-                // Implement continue block
-                IR::IREmitter continue_ir{*new_continue_block};
-                const IR::U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)};
-                continue_ir.BranchConditional(continue_cond, ir.block, merge_block);
+                continue_block->AddBranch(loop_header_block);
+                continue_block->AddBranch(merge_block);
 
                 current_block = merge_block;
+
+                auto& loop{syntax_list[loop_node_index]};
+                loop.type = IR::AbstractSyntaxNode::Type::Loop;
+                loop.loop.body = body_block;
+                loop.loop.continue_block = continue_block;
+                loop.loop.merge = merge_block;
+
+                auto& continue_block_node{syntax_list.emplace_back()};
+                continue_block_node.type = IR::AbstractSyntaxNode::Type::Block;
+                continue_block_node.block = continue_block;
+
+                auto& repeat{syntax_list.emplace_back()};
+                repeat.type = IR::AbstractSyntaxNode::Type::Repeat;
+                repeat.repeat.cond = cond;
+                repeat.repeat.loop_header = loop_header_block;
+                repeat.repeat.merge = merge_block;
+
+                auto& merge{syntax_list.emplace_back()};
+                merge.type = IR::AbstractSyntaxNode::Type::Block;
+                merge.block = merge_block;
                 break;
             }
             case StatementType::Break: {
-                if (!current_block) {
-                    current_block = block_pool.Create(inst_pool);
-                    block_list.push_back(current_block);
-                }
+                ensure_block();
                 IR::Block* const skip_block{MergeBlock(parent, stmt)};
 
                 IR::IREmitter ir{*current_block};
-                ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block);
-
+                const IR::U1 cond{VisitExpr(ir, *stmt.cond)};
+                ir.BranchConditionRef(cond);
+                current_block->AddBranch(break_block);
+                current_block->AddBranch(skip_block);
                 current_block = skip_block;
+
+                auto& break_node{syntax_list.emplace_back()};
+                break_node.type = IR::AbstractSyntaxNode::Type::Break;
+                break_node.break_node.cond = cond;
+                break_node.break_node.merge = break_block;
+                break_node.break_node.skip = skip_block;
+
+                auto& merge{syntax_list.emplace_back()};
+                merge.type = IR::AbstractSyntaxNode::Type::Block;
+                merge.block = skip_block;
                 break;
             }
             case StatementType::Return: {
-                if (!current_block) {
-                    current_block = block_pool.Create(inst_pool);
-                    block_list.push_back(current_block);
-                }
-                IR::IREmitter ir{*current_block};
-                ir.Epilogue();
-                ir.Return();
+                ensure_block();
+                IR::IREmitter{*current_block}.Epilogue();
                 current_block = nullptr;
+                syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
                 break;
             }
             case StatementType::Kill: {
-                if (!current_block) {
-                    current_block = block_pool.Create(inst_pool);
-                    block_list.push_back(current_block);
-                }
+                ensure_block();
                 IR::Block* demote_block{MergeBlock(parent, stmt)};
-                IR::IREmitter{*current_block}.DemoteToHelperInvocation(demote_block);
+                IR::IREmitter{*current_block}.DemoteToHelperInvocation();
+                current_block->AddBranch(demote_block);
                 current_block = demote_block;
+
+                auto& merge{syntax_list.emplace_back()};
+                merge.type = IR::AbstractSyntaxNode::Type::Block;
+                merge.block = demote_block;
                 break;
             }
             case StatementType::Unreachable: {
-                if (!current_block) {
-                    current_block = block_pool.Create(inst_pool);
-                    block_list.push_back(current_block);
-                }
-                IR::IREmitter{*current_block}.Unreachable();
+                ensure_block();
                 current_block = nullptr;
+                syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
                 break;
             }
             default:
@@ -801,42 +838,42 @@ private:
             }
         }
         if (current_block) {
-            IR::IREmitter ir{*current_block};
-            if (continue_block) {
-                ir.Branch(continue_block);
+            if (fallthrough_block) {
+                current_block->AddBranch(fallthrough_block);
             } else {
-                ir.Unreachable();
+                syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
             }
         }
     }
 
     IR::Block* MergeBlock(Statement& parent, Statement& stmt) {
-        if (IR::Block* const block{TryFindForwardBlock(stmt)}) {
-            return block;
+        Statement* merge_stmt{TryFindForwardBlock(stmt)};
+        if (!merge_stmt) {
+            // Create a merge block we can visit later
+            merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent);
+            parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
         }
-        // Create a merge block we can visit later
-        IR::Block* const block{block_pool.Create(inst_pool)};
-        Statement* const merge_stmt{stmt_pool.Create(block, &parent)};
-        parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
-        return block;
+        return block_pool.Create(inst_pool);
     }
 
     ObjectPool<Statement>& stmt_pool;
     ObjectPool<IR::Inst>& inst_pool;
     ObjectPool<IR::Block>& block_pool;
     Environment& env;
-    IR::BlockList& block_list;
+    IR::AbstractSyntaxList& syntax_list;
+    // TODO: Make this constexpr when std::vector is constexpr
+    const Flow::Block dummy_flow_block;
 };
 } // Anonymous namespace
 
-IR::BlockList VisitAST(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
-                       Environment& env, Flow::CFG& cfg) {
+IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
+                                Environment& env, Flow::CFG& cfg) {
     ObjectPool<Statement> stmt_pool{64};
     GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool};
     Statement& root{goto_pass.RootStatement()};
-    IR::BlockList block_list;
-    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, block_list};
-    return block_list;
+    IR::AbstractSyntaxList syntax_list;
+    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
+    return syntax_list;
 }
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
index a6be12ba2e..88b0836491 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -4,12 +4,8 @@
 
 #pragma once
 
-#include <functional>
-#include <span>
-
-#include <boost/intrusive/list.hpp>
-
 #include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
 #include "shader_recompiler/frontend/ir/value.h"
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
@@ -17,8 +13,8 @@
 
 namespace Shader::Maxwell {
 
-[[nodiscard]] IR::BlockList VisitAST(ObjectPool<IR::Inst>& inst_pool,
-                                     ObjectPool<IR::Block>& block_pool, Environment& env,
-                                     Flow::CFG& cfg);
+[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
+                                              ObjectPool<IR::Block>& block_pool, Environment& env,
+                                              Flow::CFG& cfg);
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
index f1230f58fe..0f4e7a251a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
@@ -23,13 +23,12 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) {
     }
 }
 
-void Translate(Environment& env, IR::Block* block) {
-    if (block->IsVirtual()) {
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) {
+    if (location_begin == location_end) {
         return;
     }
     TranslatorVisitor visitor{env, *block};
-    const Location pc_end{block->LocationEnd()};
-    for (Location pc = block->LocationBegin(); pc != pc_end; ++pc) {
+    for (Location pc = location_begin; pc != location_end; ++pc) {
         const u64 insn{env.ReadInstruction(pc.Offset())};
         const Opcode opcode{Decode(insn)};
         switch (opcode) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h
index e1aa2e0f4b..a3edd2e466 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/translate.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h
@@ -9,6 +9,6 @@
 
 namespace Shader::Maxwell {
 
-void Translate(Environment& env, IR::Block* block);
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end);
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index b1c45d13a8..66f1391dbf 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -353,24 +353,6 @@ IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<
     return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
 }
 
-void FoldBranchConditional(IR::Inst& inst) {
-    const IR::U1 cond{inst.Arg(0)};
-    if (cond.IsImmediate()) {
-        // TODO: Convert to Branch
-        return;
-    }
-    const IR::Inst* cond_inst{cond.InstRecursive()};
-    if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) {
-        const IR::Value true_label{inst.Arg(1)};
-        const IR::Value false_label{inst.Arg(2)};
-        // Remove negation on the conditional (take the parameter out of LogicalNot) and swap
-        // the branches
-        inst.SetArg(0, cond_inst->Arg(0));
-        inst.SetArg(1, false_label);
-        inst.SetArg(2, true_label);
-    }
-}
-
 std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
                                                   IR::Opcode construct, u32 first_index) {
     IR::Inst* const inst{inst_value.InstRecursive()};
@@ -581,8 +563,6 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
             return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
         });
         return;
-    case IR::Opcode::BranchConditional:
-        return FoldBranchConditional(inst);
     case IR::Opcode::CompositeExtractF32x2:
         return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2,
                                     IR::Opcode::CompositeInsertF32x2);
diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
index f2d7db0e6b..b0a9f5258e 100644
--- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
+++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
@@ -13,60 +13,16 @@
 
 namespace Shader::Optimization {
 
-void VertexATransformPass(IR::Program& program) {
-    bool replaced_join{};
-    bool eliminated_epilogue{};
-    for (IR::Block* const block : program.post_order_blocks) {
-        for (IR::Inst& inst : block->Instructions()) {
-            switch (inst.GetOpcode()) {
-            case IR::Opcode::Return:
-                inst.ReplaceOpcode(IR::Opcode::Join);
-                replaced_join = true;
-                break;
-            case IR::Opcode::Epilogue:
-                inst.Invalidate();
-                eliminated_epilogue = true;
-                break;
-            default:
-                break;
-            }
-            if (replaced_join && eliminated_epilogue) {
-                return;
-            }
-        }
-    }
+void VertexATransformPass(IR::Program&) {
+    throw NotImplementedException("VertexA pass");
 }
 
-void VertexBTransformPass(IR::Program& program) {
-    for (IR::Block* const block : program.blocks) {
-        for (IR::Inst& inst : block->Instructions()) {
-            if (inst.GetOpcode() == IR::Opcode::Prologue) {
-                return inst.Invalidate();
-            }
-        }
-    }
+void VertexBTransformPass(IR::Program&) {
+    throw NotImplementedException("VertexA pass");
 }
 
-void DualVertexJoinPass(IR::Program& program) {
-    const auto& blocks = program.blocks;
-    const s64 sub_size = static_cast<s64>(blocks.size()) - 1;
-    if (sub_size < 1) {
-        throw LogicError("Dual Vertex Join pass failed, expected atleast 2 blocks");
-    }
-    for (s64 index = 0; index < sub_size; ++index) {
-        IR::Block* const current_block{blocks[index]};
-        IR::Block* const next_block{blocks[index + 1]};
-        for (IR::Inst& inst : current_block->Instructions()) {
-            if (inst.GetOpcode() == IR::Opcode::Join) {
-                IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)};
-                ir.Branch(next_block);
-                inst.Invalidate();
-                // Only 1 join should exist
-                return;
-            }
-        }
-    }
-    throw LogicError("Dual Vertex Join pass failed, no join present");
+void DualVertexJoinPass(IR::Program&) {
+    throw NotImplementedException("VertexA pass");
 }
 
 } // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
index 6afbe24f7f..e9b55f8358 100644
--- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -12,7 +12,6 @@ namespace Shader::Optimization {
 
 void IdentityRemovalPass(IR::Program& program) {
     std::vector<IR::Inst*> to_invalidate;
-
     for (IR::Block* const block : program.blocks) {
         for (auto inst = block->begin(); inst != block->end();) {
             const size_t num_args{inst->NumArgs()};
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index a8064a5d00..26eb3a3abc 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -202,7 +202,7 @@ public:
 
                     incomplete_phis[block].insert_or_assign(variable, phi);
                     stack.back().result = IR::Value{&*phi};
-                } else if (const std::span imm_preds{block->ImmediatePredecessors()};
+                } else if (const std::span imm_preds = block->ImmPredecessors();
                            imm_preds.size() == 1) {
                     // Optimize the common case of one predecessor: no phi needed
                     stack.back().pc = Status::SetValue;
@@ -257,7 +257,7 @@ public:
 private:
     template <typename Type>
     IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
-        for (IR::Block* const imm_pred : block->ImmediatePredecessors()) {
+        for (IR::Block* const imm_pred : block->ImmPredecessors()) {
             phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
         }
         return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
-- 
cgit v1.2.3-70-g09d2


From bf5e48ffe4bd48ea681f2a01c8919c97125e88df Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 14 May 2021 04:48:46 -0300
Subject: glasm: Initial implementation of phi nodes on GLASM

---
 src/shader_recompiler/backend/glasm/emit_glasm.cpp | 59 ++++++++++++++++++++--
 .../backend/glasm/emit_glasm_instructions.h        |  3 +-
 .../backend/glasm/emit_glasm_not_implemented.cpp   | 27 ++++++++--
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |  6 ++-
 .../backend/spirv/emit_spirv_instructions.h        |  3 +-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 18 +++++--
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  4 +-
 .../frontend/ir/microinstruction.cpp               |  3 +-
 src/shader_recompiler/frontend/ir/opcodes.inc      |  3 +-
 src/shader_recompiler/frontend/ir/value.h          |  4 ++
 .../frontend/maxwell/structured_control_flow.cpp   |  6 +--
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  |  6 +--
 12 files changed, 117 insertions(+), 25 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
index fa48ba25c8..775dd9e7e6 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <ranges>
 #include <string>
 #include <tuple>
 
@@ -9,6 +10,7 @@
 #include "shader_recompiler/backend/glasm/emit_context.h"
 #include "shader_recompiler/backend/glasm/emit_glasm.h"
 #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
 #include "shader_recompiler/frontend/ir/program.h"
 #include "shader_recompiler/profile.h"
 
@@ -175,6 +177,34 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) {
     throw LogicError("Invalid opcode {}", inst->GetOpcode());
 }
 
+void Precolor(EmitContext& ctx, const IR::Program& program) {
+    for (IR::Block* const block : program.blocks) {
+        for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) {
+            switch (phi.Arg(0).Type()) {
+            case IR::Type::U1:
+            case IR::Type::U32:
+            case IR::Type::F32:
+                ctx.reg_alloc.Define(phi);
+                break;
+            case IR::Type::U64:
+            case IR::Type::F64:
+                ctx.reg_alloc.LongDefine(phi);
+                break;
+            default:
+                throw NotImplementedException("Phi node type {}", phi.Type());
+            }
+            const size_t num_args{phi.NumArgs()};
+            for (size_t i = 0; i < num_args; ++i) {
+                IR::IREmitter{*phi.PhiBlock(i)}.PhiMove(phi, phi.Arg(i));
+            }
+            // Add reference to the phi node on the phi predecessor to avoid overwritting it
+            for (size_t i = 0; i < num_args; ++i) {
+                IR::IREmitter{*phi.PhiBlock(i)}.DummyReference(IR::Value{&phi});
+            }
+        }
+    }
+}
+
 void EmitCode(EmitContext& ctx, const IR::Program& program) {
     const auto eval{
         [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }};
@@ -186,7 +216,9 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) {
             }
             break;
         case IR::AbstractSyntaxNode::Type::If:
-            ctx.Add("MOV.S.CC RC,{};IF NE.x;", eval(node.if_node.cond));
+            ctx.Add("MOV.S.CC RC,{};"
+                    "IF NE.x;",
+                    eval(node.if_node.cond));
             break;
         case IR::AbstractSyntaxNode::Type::EndIf:
             ctx.Add("ENDIF;");
@@ -195,10 +227,30 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) {
             ctx.Add("REP;");
             break;
         case IR::AbstractSyntaxNode::Type::Repeat:
-            ctx.Add("MOV.S.CC RC,{};BRK NE.x;ENDREP;", eval(node.repeat.cond));
+            if (node.repeat.cond.IsImmediate()) {
+                if (node.repeat.cond.U1()) {
+                    ctx.Add("ENDREP;");
+                } else {
+                    ctx.Add("BRK;"
+                            "ENDREP;");
+                }
+            } else {
+                ctx.Add("MOV.S.CC RC,{};"
+                        "BRK (EQ.x);"
+                        "ENDREP;",
+                        eval(node.repeat.cond));
+            }
             break;
         case IR::AbstractSyntaxNode::Type::Break:
-            ctx.Add("MOV.S.CC RC,{};BRK NE.x;", eval(node.repeat.cond));
+            if (node.break_node.cond.IsImmediate()) {
+                if (node.break_node.cond.U1()) {
+                    ctx.Add("BRK;");
+                }
+            } else {
+                ctx.Add("MOV.S.CC RC,{};"
+                        "BRK (NE.x);",
+                        eval(node.break_node.cond));
+            }
             break;
         case IR::AbstractSyntaxNode::Type::Return:
         case IR::AbstractSyntaxNode::Type::Unreachable:
@@ -233,6 +285,7 @@ void SetupOptions(std::string& header, Info info) {
 
 std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) {
     EmitContext ctx{program};
+    Precolor(ctx, program);
     EmitCode(ctx, program);
     std::string header = "!!NVcp5.0\n"
                          "OPTION NV_internal;";
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
index 0f7f16e6e2..a74e422d6f 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -22,7 +22,8 @@ class EmitContext;
 void EmitPhi(EmitContext& ctx, IR::Inst& inst);
 void EmitVoid(EmitContext& ctx);
 void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
-void EmitBranchConditionRef(EmitContext&);
+void EmitDummyReference(EmitContext&);
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
 void EmitJoin(EmitContext& ctx);
 void EmitDemoteToHelperInvocation(EmitContext& ctx);
 void EmitBarrier(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
index f37ad55879..969b91a810 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
@@ -17,13 +17,32 @@ namespace Shader::Backend::GLASM {
 
 #define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__)
 
-void EmitPhi(EmitContext& ctx, IR::Inst& inst) {
-    NotImplemented();
-}
+void EmitPhi(EmitContext&, IR::Inst&) {}
 
 void EmitVoid(EmitContext&) {}
 
-void EmitBranchConditionRef(EmitContext&) {}
+void EmitDummyReference(EmitContext&) {}
+
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) {
+    if (phi == value) {
+        return;
+    }
+    const Register phi_reg{ctx.reg_alloc.Consume(phi)};
+    const Value eval_value{ctx.reg_alloc.Consume(value)};
+    switch (phi.InstRecursive()->Arg(0).Type()) {
+    case IR::Type::U1:
+    case IR::Type::U32:
+    case IR::Type::F32:
+        ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value});
+        break;
+    case IR::Type::U64:
+    case IR::Type::F64:
+        ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value});
+        break;
+    default:
+        throw NotImplementedException("Phi node type {}", phi.Type());
+    }
+}
 
 void EmitJoin(EmitContext& ctx) {
     NotImplemented();
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index c22edfec2a..7bf8c78dea 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -468,7 +468,11 @@ Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
     return id;
 }
 
-void EmitBranchConditionRef(EmitContext&) {}
+void EmitDummyReference(EmitContext&) {}
+
+void EmitPhiMove(EmitContext&) {
+    throw LogicError("Unreachable instruction");
+}
 
 void EmitGetZeroFromOp(EmitContext&) {
     throw LogicError("Unreachable instruction");
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 2f4f6e59ed..0a2b317729 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -23,7 +23,8 @@ class EmitContext;
 Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
 void EmitVoid(EmitContext& ctx);
 Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
-void EmitBranchConditionRef(EmitContext&);
+void EmitDummyReference(EmitContext&);
+void EmitPhiMove(EmitContext&);
 void EmitJoin(EmitContext& ctx);
 void EmitDemoteToHelperInvocation(EmitContext& ctx);
 void EmitBarrier(EmitContext& ctx);
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index eb45aa4772..def29143ec 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -61,6 +61,14 @@ F64 IREmitter::Imm64(f64 value) const {
     return F64{Value{value}};
 }
 
+void IREmitter::DummyReference(const Value& value) {
+    Inst(Opcode::DummyReference, value);
+}
+
+void IREmitter::PhiMove(IR::Inst& phi, const Value& value) {
+    Inst(Opcode::PhiMove, Value{&phi}, value);
+}
+
 void IREmitter::Prologue() {
     Inst(Opcode::Prologue);
 }
@@ -69,10 +77,6 @@ void IREmitter::Epilogue() {
     Inst(Opcode::Epilogue);
 }
 
-void IREmitter::BranchConditionRef(const U1& cond) {
-    Inst(Opcode::BranchConditionRef, cond);
-}
-
 void IREmitter::DemoteToHelperInvocation() {
     Inst(Opcode::DemoteToHelperInvocation);
 }
@@ -106,6 +110,9 @@ void IREmitter::SetReg(IR::Reg reg, const U32& value) {
 }
 
 U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) {
+    if (pred == Pred::PT) {
+        return Imm1(!is_negated);
+    }
     const U1 value{Inst<U1>(Opcode::GetPred, pred)};
     if (is_negated) {
         return Inst<U1>(Opcode::LogicalNot, value);
@@ -264,6 +271,9 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) {
 U1 IREmitter::Condition(IR::Condition cond) {
     const FlowTest flow_test{cond.GetFlowTest()};
     const auto [pred, is_negated]{cond.GetPred()};
+    if (flow_test == FlowTest::T) {
+        return GetPred(pred, is_negated);
+    }
     return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test));
 }
 
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 7a83c33d33..4f7c820feb 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -32,9 +32,11 @@ public:
     [[nodiscard]] U64 Imm64(s64 value) const;
     [[nodiscard]] F64 Imm64(f64 value) const;
 
+    void DummyReference(const Value& value);
+    void PhiMove(IR::Inst& phi, const Value& value);
+
     void Prologue();
     void Epilogue();
-    void BranchConditionRef(const U1& cond);
     void DemoteToHelperInvocation();
     void EmitVertex(const U32& stream);
     void EndPrimitive(const U32& stream);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 3645742401..267aebc612 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -56,9 +56,10 @@ Inst::~Inst() {
 
 bool Inst::MayHaveSideEffects() const noexcept {
     switch (op) {
+    case Opcode::DummyReference:
+    case Opcode::PhiMove:
     case Opcode::Prologue:
     case Opcode::Epilogue:
-    case Opcode::BranchConditionRef:
     case Opcode::Join:
     case Opcode::DemoteToHelperInvocation:
     case Opcode::Barrier:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 75ddb6b6f8..6196b867dd 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -6,11 +6,12 @@
 OPCODE(Phi,                                                 Opaque,                                                                                         )
 OPCODE(Identity,                                            Opaque,         Opaque,                                                                         )
 OPCODE(Void,                                                Void,                                                                                           )
+OPCODE(DummyReference,                                      Void,           Opaque,                                                                         )
+OPCODE(PhiMove,                                             Void,           Opaque,         Opaque,                                                         )
 
 // Special operations
 OPCODE(Prologue,                                            Void,                                                                                           )
 OPCODE(Epilogue,                                            Void,                                                                                           )
-OPCODE(BranchConditionRef,                                  Void,           U1,                                                                             )
 OPCODE(Join,                                                Void,                                                                                           )
 OPCODE(DemoteToHelperInvocation,                            Void,                                                                                           )
 OPCODE(EmitVertex,                                          Void,           U32,                                                                            )
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 2ce49f953d..0c6bf684d0 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -391,4 +391,8 @@ inline f64 Value::F64() const {
     return imm_f64;
 }
 
+[[nodiscard]] inline bool IsPhi(const Inst& inst) {
+    return inst.GetOpcode() == Opcode::Phi;
+}
+
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index e7e2e9c826..836d4b8aac 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -704,7 +704,7 @@ private:
                 // Implement if header block
                 IR::IREmitter ir{*current_block};
                 const IR::U1 cond{VisitExpr(ir, *stmt.cond)};
-                ir.BranchConditionRef(cond);
+                ir.DummyReference(cond);
 
                 const size_t if_node_index{syntax_list.size()};
                 syntax_list.emplace_back();
@@ -755,7 +755,7 @@ private:
                 // The continue block is located at the end of the loop
                 IR::IREmitter ir{*continue_block};
                 const IR::U1 cond{VisitExpr(ir, *stmt.cond)};
-                ir.BranchConditionRef(cond);
+                ir.DummyReference(cond);
 
                 IR::Block* const body_block{syntax_list.at(body_block_index).block};
                 loop_header_block->AddBranch(body_block);
@@ -792,7 +792,7 @@ private:
 
                 IR::IREmitter ir{*current_block};
                 const IR::U1 cond{VisitExpr(ir, *stmt.cond)};
-                ir.BranchConditionRef(cond);
+                ir.DummyReference(cond);
                 current_block->AddBranch(break_block);
                 current_block->AddBranch(skip_block);
                 current_block = skip_block;
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index 26eb3a3abc..e54499ba50 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -138,10 +138,6 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
     return IR::Opcode::UndefU32;
 }
 
-[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {
-    return inst.GetOpcode() == IR::Opcode::Phi;
-}
-
 enum class Status {
     Start,
     SetValue,
@@ -283,7 +279,7 @@ private:
         list.erase(IR::Block::InstructionList::s_iterator_to(phi));
 
         // Find the first non-phi instruction and use it as an insertion point
-        IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IsPhi)};
+        IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
         if (same.IsEmpty()) {
             // The phi is unreachable or in the start block
             // Insert an undefined instruction and make it the phi node replacement
-- 
cgit v1.2.3-70-g09d2


From 9bb3e008c9f4bbdd35c095b506c3a3312d17e383 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 18 May 2021 02:04:22 -0300
Subject: shader: Read branch conditions from an instruction

Fixes the identity removal pass.
---
 src/shader_recompiler/backend/glasm/emit_glasm.cpp             |  2 +-
 .../backend/glasm/emit_glasm_bitwise_conversion.cpp            |  4 ++++
 src/shader_recompiler/backend/glasm/emit_glasm_instructions.h  |  3 ++-
 .../backend/glasm/emit_glasm_not_implemented.cpp               |  2 +-
 src/shader_recompiler/backend/glasm/reg_alloc.cpp              |  1 +
 src/shader_recompiler/backend/spirv/emit_spirv.cpp             | 10 +++++++++-
 src/shader_recompiler/backend/spirv/emit_spirv_instructions.h  |  3 ++-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp               |  8 ++++++--
 src/shader_recompiler/frontend/ir/ir_emitter.h                 |  4 +++-
 src/shader_recompiler/frontend/ir/microinstruction.cpp         |  3 ++-
 src/shader_recompiler/frontend/ir/opcodes.inc                  |  3 ++-
 .../frontend/maxwell/structured_control_flow.cpp               |  9 +++------
 12 files changed, 36 insertions(+), 16 deletions(-)

(limited to 'src/shader_recompiler/frontend/ir/microinstruction.cpp')

diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
index d7a08e4b36..a893fa3fbe 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -200,7 +200,7 @@ void Precolor(EmitContext& ctx, const IR::Program& program) {
             }
             // Add reference to the phi node on the phi predecessor to avoid overwritting it
             for (size_t i = 0; i < num_args; ++i) {
-                IR::IREmitter{*phi.PhiBlock(i)}.DummyReference(IR::Value{&phi});
+                IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
             }
         }
     }
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
index cdbf6e93ec..505378bfd6 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
@@ -22,6 +22,10 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
     Alias(inst, value);
 }
 
+void EmitConditionRef(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+    Alias(inst, value);
+}
+
 void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
     Alias(inst, value);
 }
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
index 54e7fab3cb..df0933a3f2 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -22,7 +22,8 @@ class EmitContext;
 void EmitPhi(EmitContext& ctx, IR::Inst& inst);
 void EmitVoid(EmitContext& ctx);
 void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
-void EmitDummyReference(EmitContext&);
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitReference(EmitContext&);
 void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
 void EmitJoin(EmitContext& ctx);
 void EmitDemoteToHelperInvocation(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
index a4c1ca4819..015cb55764 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
@@ -21,7 +21,7 @@ void EmitPhi(EmitContext&, IR::Inst&) {}
 
 void EmitVoid(EmitContext&) {}
 
-void EmitDummyReference(EmitContext&) {}
+void EmitReference(EmitContext&) {}
 
 void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) {
     if (phi == value) {
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
index 707b22247a..1a88331b41 100644
--- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
@@ -139,6 +139,7 @@ void RegAlloc::Free(Id id) {
 /*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) {
     switch (inst.GetOpcode()) {
     case IR::Opcode::Identity:
+    case IR::Opcode::ConditionRef:
     case IR::Opcode::BitCastU16F16:
     case IR::Opcode::BitCastU32F32:
     case IR::Opcode::BitCastU64F64:
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 9ed2af9913..3e20ac3b90 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -469,7 +469,15 @@ Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
     return id;
 }
 
-void EmitDummyReference(EmitContext&) {}
+Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) {
+    const Id id{ctx.Def(value)};
+    if (!Sirit::ValidId(id)) {
+        throw NotImplementedException("Forward identity declaration");
+    }
+    return id;
+}
+
+void EmitReference(EmitContext&) {}
 
 void EmitPhiMove(EmitContext&) {
     throw LogicError("Unreachable instruction");
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 22260d2a98..1181e7b4ff 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -23,7 +23,8 @@ class EmitContext;
 Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
 void EmitVoid(EmitContext& ctx);
 Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
-void EmitDummyReference(EmitContext&);
+Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
+void EmitReference(EmitContext&);
 void EmitPhiMove(EmitContext&);
 void EmitJoin(EmitContext& ctx);
 void EmitDemoteToHelperInvocation(EmitContext& ctx);
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 94bdbe39c1..e9fd412373 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -61,8 +61,12 @@ F64 IREmitter::Imm64(f64 value) const {
     return F64{Value{value}};
 }
 
-void IREmitter::DummyReference(const Value& value) {
-    Inst(Opcode::DummyReference, value);
+U1 IREmitter::ConditionRef(const U1& value) {
+    return Inst<U1>(Opcode::ConditionRef, value);
+}
+
+void IREmitter::Reference(const Value& value) {
+    Inst(Opcode::Reference, value);
 }
 
 void IREmitter::PhiMove(IR::Inst& phi, const Value& value) {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 4ae69b7886..bb3500c541 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -32,7 +32,9 @@ public:
     [[nodiscard]] U64 Imm64(s64 value) const;
     [[nodiscard]] F64 Imm64(f64 value) const;
 
-    void DummyReference(const Value& value);
+    U1 ConditionRef(const U1& value);
+    void Reference(const Value& value);
+
     void PhiMove(IR::Inst& phi, const Value& value);
 
     void Prologue();
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 267aebc612..3dfa5a8804 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -56,7 +56,8 @@ Inst::~Inst() {
 
 bool Inst::MayHaveSideEffects() const noexcept {
     switch (op) {
-    case Opcode::DummyReference:
+    case Opcode::ConditionRef:
+    case Opcode::Reference:
     case Opcode::PhiMove:
     case Opcode::Prologue:
     case Opcode::Epilogue:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 6196b867dd..8a8d0d7593 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -6,7 +6,8 @@
 OPCODE(Phi,                                                 Opaque,                                                                                         )
 OPCODE(Identity,                                            Opaque,         Opaque,                                                                         )
 OPCODE(Void,                                                Void,                                                                                           )
-OPCODE(DummyReference,                                      Void,           Opaque,                                                                         )
+OPCODE(ConditionRef,                                        U1,             U1,                                                                             )
+OPCODE(Reference,                                           Void,           Opaque,                                                                         )
 OPCODE(PhiMove,                                             Void,           Opaque,         Opaque,                                                         )
 
 // Special operations
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index 83554a9539..ebe5c2654e 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -703,8 +703,7 @@ private:
 
                 // Implement if header block
                 IR::IREmitter ir{*current_block};
-                const IR::U1 cond{VisitExpr(ir, *stmt.cond)};
-                ir.DummyReference(cond);
+                const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
 
                 const size_t if_node_index{syntax_list.size()};
                 syntax_list.emplace_back();
@@ -754,8 +753,7 @@ private:
 
                 // The continue block is located at the end of the loop
                 IR::IREmitter ir{*continue_block};
-                const IR::U1 cond{VisitExpr(ir, *stmt.cond)};
-                ir.DummyReference(cond);
+                const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
 
                 IR::Block* const body_block{syntax_list.at(body_block_index).data.block};
                 loop_header_block->AddBranch(body_block);
@@ -791,8 +789,7 @@ private:
                 IR::Block* const skip_block{MergeBlock(parent, stmt)};
 
                 IR::IREmitter ir{*current_block};
-                const IR::U1 cond{VisitExpr(ir, *stmt.cond)};
-                ir.DummyReference(cond);
+                const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
                 current_block->AddBranch(break_block);
                 current_block->AddBranch(skip_block);
                 current_block = skip_block;
-- 
cgit v1.2.3-70-g09d2