From 2d48a7b4d0666ad16d03a22d85712617a0849046 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 9 Jan 2021 03:30:07 -0300
Subject: shader: Initial recompiler work

---
 .../frontend/maxwell/translate/impl/impl.cpp       | 79 ++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
new file mode 100644
index 0000000000..7bc7ce9f29
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -0,0 +1,79 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+IR::U32 TranslatorVisitor::X(IR::Reg reg) {
+    return ir.GetReg(reg);
+}
+
+void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
+    ir.SetReg(dest_reg, value);
+}
+
+IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 14, s64> offset;
+        BitField<34, 5, u64> binding;
+    } const cbuf{insn};
+    if (cbuf.binding >= 18) {
+        throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+    }
+    if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) {
+        throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
+    }
+    const IR::U32 binding{ir.Imm32(static_cast<u32>(cbuf.binding))};
+    const IR::U32 byte_offset{ir.Imm32(static_cast<u32>(cbuf.offset) * 4)};
+    return ir.GetCbuf(binding, byte_offset);
+}
+
+IR::U32 TranslatorVisitor::GetImm(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 19, u64> value;
+        BitField<56, 1, u64> is_negative;
+    } const imm{insn};
+    const s32 positive_value{static_cast<s32>(imm.value)};
+    const s32 value{imm.is_negative != 0 ? -positive_value : positive_value};
+    return ir.Imm32(value);
+}
+
+void TranslatorVisitor::SetZFlag(const IR::U1& value) {
+    ir.SetZFlag(value);
+}
+
+void TranslatorVisitor::SetSFlag(const IR::U1& value) {
+    ir.SetSFlag(value);
+}
+
+void TranslatorVisitor::SetCFlag(const IR::U1& value) {
+    ir.SetCFlag(value);
+}
+
+void TranslatorVisitor::SetOFlag(const IR::U1& value) {
+    ir.SetOFlag(value);
+}
+
+void TranslatorVisitor::ResetZero() {
+    SetZFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetSFlag() {
+    SetSFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetCFlag() {
+    SetCFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetOFlag() {
+    SetOFlag(ir.Imm1(false));
+}
+
+} // namespace Shader::Maxwell
-- 
cgit v1.2.3-70-g09d2


From d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 3 Feb 2021 16:43:04 -0300
Subject: shader: Initial instruction support

---
 src/shader_recompiler/CMakeLists.txt               |  13 +-
 src/shader_recompiler/frontend/ir/basic_block.cpp  |   4 +-
 src/shader_recompiler/frontend/ir/basic_block.h    |   2 +-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 200 +++++++++++++++++++--
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  67 ++++++-
 .../frontend/ir/microinstruction.h                 |  12 +-
 src/shader_recompiler/frontend/ir/modifiers.h      |  28 +++
 src/shader_recompiler/frontend/ir/opcode.inc       | 139 +++++++++-----
 src/shader_recompiler/frontend/ir/pred.h           |  11 +-
 src/shader_recompiler/frontend/maxwell/program.cpp |   1 +
 .../maxwell/translate/impl/common_encoding.h       |  56 ++++++
 .../maxwell/translate/impl/floating_point_add.cpp  |  71 ++++++++
 .../impl/floating_point_fused_multiply_add.cpp     |  73 ++++++++
 .../translate/impl/floating_point_multiply.cpp     | 108 +++++++++++
 .../frontend/maxwell/translate/impl/impl.cpp       |  26 ++-
 .../frontend/maxwell/translate/impl/impl.h         |   9 +-
 .../maxwell/translate/impl/integer_add.cpp         | 106 +++++++++++
 .../maxwell/translate/impl/integer_scaled_add.cpp  |  73 ++++++++
 .../translate/impl/integer_set_predicate.cpp       |  99 ++++++++++
 .../maxwell/translate/impl/integer_shift_left.cpp  |  71 ++++++++
 .../translate/impl/integer_short_multiply_add.cpp  | 110 ++++++++++++
 .../maxwell/translate/impl/load_store_memory.cpp   | 149 ++++++++++++---
 .../maxwell/translate/impl/move_register.cpp       |  45 +++++
 .../translate/impl/move_special_register.cpp       | 114 ++++++++++++
 .../maxwell/translate/impl/not_implemented.cpp     | 149 +--------------
 .../maxwell/translate/impl/register_move.cpp       |  45 -----
 .../ir_opt/get_set_elimination_pass.cpp            |  87 ---------
 src/shader_recompiler/ir_opt/passes.h              |   1 -
 src/shader_recompiler/main.cpp                     |   3 +-
 29 files changed, 1494 insertions(+), 378 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/ir/modifiers.h
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
 delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp
 delete mode 100644 src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 36a61f21ac..f5dd4d29ea 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -39,18 +39,27 @@ add_executable(shader_recompiler
     frontend/maxwell/program.h
     frontend/maxwell/termination_code.cpp
     frontend/maxwell/termination_code.h
+    frontend/maxwell/translate/impl/common_encoding.h
+    frontend/maxwell/translate/impl/floating_point_add.cpp
     frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
+    frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
     frontend/maxwell/translate/impl/floating_point_multi_function.cpp
+    frontend/maxwell/translate/impl/floating_point_multiply.cpp
     frontend/maxwell/translate/impl/impl.cpp
     frontend/maxwell/translate/impl/impl.h
+    frontend/maxwell/translate/impl/integer_add.cpp
+    frontend/maxwell/translate/impl/integer_scaled_add.cpp
+    frontend/maxwell/translate/impl/integer_set_predicate.cpp
+    frontend/maxwell/translate/impl/integer_shift_left.cpp
+    frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
     frontend/maxwell/translate/impl/load_store_attribute.cpp
     frontend/maxwell/translate/impl/load_store_memory.cpp
     frontend/maxwell/translate/impl/not_implemented.cpp
-    frontend/maxwell/translate/impl/register_move.cpp
+    frontend/maxwell/translate/impl/move_register.cpp
+    frontend/maxwell/translate/impl/move_special_register.cpp
     frontend/maxwell/translate/translate.cpp
     frontend/maxwell/translate/translate.h
     ir_opt/dead_code_elimination_pass.cpp
-    ir_opt/get_set_elimination_pass.cpp
     ir_opt/identity_removal_pass.cpp
     ir_opt/passes.h
     ir_opt/ssa_rewrite_pass.cpp
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index e795618fcf..249251dd0b 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -23,8 +23,8 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
 }
 
 Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
-                                      std::initializer_list<Value> args) {
-    Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)};
+                                      std::initializer_list<Value> args, u64 flags) {
+    Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)};
     const auto result_it{instructions.insert(insertion_point, *inst)};
 
     if (inst->NumArgs() != args.size()) {
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
index 4b6b80c4b2..ec4a41cb1a 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.h
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -39,7 +39,7 @@ public:
 
     /// Prepends a new instruction to this basic block before the insertion point.
     iterator PrependNewInst(iterator insertion_point, Opcode op,
-                            std::initializer_list<Value> args = {});
+                            std::initializer_list<Value> args = {}, u64 flags = 0);
 
     /// Adds a new immediate predecessor to the basic block.
     void AddImmediatePredecessor(IR::Block* immediate_predecessor);
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 6450e4b2c0..87b253c9ac 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -129,6 +129,58 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) {
     Inst(Opcode::SetAttribute, attribute, value);
 }
 
+U32 IREmitter::WorkgroupIdX() {
+    return Inst<U32>(Opcode::WorkgroupIdX);
+}
+
+U32 IREmitter::WorkgroupIdY() {
+    return Inst<U32>(Opcode::WorkgroupIdY);
+}
+
+U32 IREmitter::WorkgroupIdZ() {
+    return Inst<U32>(Opcode::WorkgroupIdZ);
+}
+
+U32 IREmitter::LocalInvocationIdX() {
+    return Inst<U32>(Opcode::LocalInvocationIdX);
+}
+
+U32 IREmitter::LocalInvocationIdY() {
+    return Inst<U32>(Opcode::LocalInvocationIdY);
+}
+
+U32 IREmitter::LocalInvocationIdZ() {
+    return Inst<U32>(Opcode::LocalInvocationIdZ);
+}
+
+U32 IREmitter::LoadGlobalU8(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalU8, address);
+}
+
+U32 IREmitter::LoadGlobalS8(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalS8, address);
+}
+
+U32 IREmitter::LoadGlobalU16(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalU16, address);
+}
+
+U32 IREmitter::LoadGlobalS16(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobalS16, address);
+}
+
+U32 IREmitter::LoadGlobal32(const U64& address) {
+    return Inst<U32>(Opcode::LoadGlobal32, address);
+}
+
+Value IREmitter::LoadGlobal64(const U64& address) {
+    return Inst<Value>(Opcode::LoadGlobal64, address);
+}
+
+Value IREmitter::LoadGlobal128(const U64& address) {
+    return Inst<Value>(Opcode::LoadGlobal128, address);
+}
+
 void IREmitter::WriteGlobalU8(const U64& address, const U32& value) {
     Inst(Opcode::WriteGlobalU8, address, value);
 }
@@ -173,17 +225,17 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) {
     return Inst<U1>(Opcode::GetOverflowFromOp, op);
 }
 
-U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) {
+U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) {
     if (a.Type() != a.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
     }
     switch (a.Type()) {
     case Type::U16:
-        return Inst<U16>(Opcode::FPAdd16, a, b);
+        return Inst<U16>(Opcode::FPAdd16, Flags{control}, a, b);
     case Type::U32:
-        return Inst<U32>(Opcode::FPAdd32, a, b);
+        return Inst<U32>(Opcode::FPAdd32, Flags{control}, a, b);
     case Type::U64:
-        return Inst<U64>(Opcode::FPAdd64, a, b);
+        return Inst<U64>(Opcode::FPAdd64, Flags{control}, a, b);
     default:
         ThrowInvalidType(a.Type());
     }
@@ -191,14 +243,14 @@ U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) {
 
 Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) {
     if (e1.Type() != e2.Type()) {
-        throw InvalidArgument("Incompatible types {} {}", e1.Type(), e2.Type());
+        throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
     }
     return Inst(Opcode::CompositeConstruct2, e1, e2);
 }
 
 Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) {
     if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
-        throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type());
+        throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
     }
     return Inst(Opcode::CompositeConstruct3, e1, e2, e3);
 }
@@ -206,8 +258,8 @@ Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny&
 Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3,
                                     const UAny& e4) {
     if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
-        throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type(),
-                              e4.Type());
+        throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
+                              e3.Type(), e4.Type());
     }
     return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4);
 }
@@ -219,6 +271,24 @@ UAny IREmitter::CompositeExtract(const Value& vector, size_t element) {
     return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element)));
 }
 
+UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) {
+    if (true_value.Type() != false_value.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
+    }
+    switch (true_value.Type()) {
+    case Type::U8:
+        return Inst<UAny>(Opcode::Select8, condition, true_value, false_value);
+    case Type::U16:
+        return Inst<UAny>(Opcode::Select16, condition, true_value, false_value);
+    case Type::U32:
+        return Inst<UAny>(Opcode::Select32, condition, true_value, false_value);
+    case Type::U64:
+        return Inst<UAny>(Opcode::Select64, condition, true_value, false_value);
+    default:
+        throw InvalidArgument("Invalid type {}", true_value.Type());
+    }
+}
+
 U64 IREmitter::PackUint2x32(const Value& vector) {
     return Inst<U64>(Opcode::PackUint2x32, vector);
 }
@@ -243,17 +313,34 @@ Value IREmitter::UnpackDouble2x32(const U64& value) {
     return Inst<Value>(Opcode::UnpackDouble2x32, value);
 }
 
-U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b) {
+U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) {
     if (a.Type() != b.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
     }
     switch (a.Type()) {
     case Type::U16:
-        return Inst<U16>(Opcode::FPMul16, a, b);
+        return Inst<U16>(Opcode::FPMul16, Flags{control}, a, b);
     case Type::U32:
-        return Inst<U32>(Opcode::FPMul32, a, b);
+        return Inst<U32>(Opcode::FPMul32, Flags{control}, a, b);
     case Type::U64:
-        return Inst<U64>(Opcode::FPMul64, a, b);
+        return Inst<U64>(Opcode::FPMul64, Flags{control}, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
+U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c,
+                           FpControl control) {
+    if (a.Type() != b.Type() || a.Type() != c.Type()) {
+        throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
+    }
+    switch (a.Type()) {
+    case Type::U16:
+        return Inst<U16>(Opcode::FPFma16, Flags{control}, a, b, c);
+    case Type::U32:
+        return Inst<U32>(Opcode::FPFma32, Flags{control}, a, b, c);
+    case Type::U64:
+        return Inst<U64>(Opcode::FPFma64, Flags{control}, a, b, c);
     default:
         ThrowInvalidType(a.Type());
     }
@@ -403,6 +490,91 @@ U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) {
     }
 }
 
+U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
+    if (a.Type() != b.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+    }
+    switch (a.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::IAdd32, a, b);
+    case Type::U64:
+        return Inst<U64>(Opcode::IAdd64, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
+U32 IREmitter::IMul(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::IMul32, a, b);
+}
+
+U32 IREmitter::INeg(const U32& value) {
+    return Inst<U32>(Opcode::INeg32, value);
+}
+
+U32 IREmitter::IAbs(const U32& value) {
+    return Inst<U32>(Opcode::IAbs32, value);
+}
+
+U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) {
+    return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift);
+}
+
+U32 IREmitter::ShiftRightLogical(const U32& base, const U32& shift) {
+    return Inst<U32>(Opcode::ShiftRightLogical32, base, shift);
+}
+
+U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) {
+    return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift);
+}
+
+U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::BitwiseAnd32, a, b);
+}
+
+U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::BitwiseOr32, a, b);
+}
+
+U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
+    return Inst<U32>(Opcode::BitwiseXor32, a, b);
+}
+
+U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
+                              const U32& count) {
+    return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count);
+}
+
+U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count,
+                               bool is_signed) {
+    return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset,
+                     count);
+}
+
+U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
+}
+
+U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) {
+    return Inst<U1>(Opcode::IEqual, lhs, rhs);
+}
+
+U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
+}
+
+U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
+}
+
+U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) {
+    return Inst<U1>(Opcode::INotEqual, lhs, rhs);
+}
+
+U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
+    return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
+}
+
 U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
     return Inst<U1>(Opcode::LogicalOr, a, b);
 }
@@ -411,6 +583,10 @@ U1 IREmitter::LogicalAnd(const U1& a, const U1& b) {
     return Inst<U1>(Opcode::LogicalAnd, a, b);
 }
 
+U1 IREmitter::LogicalXor(const U1& a, const U1& b) {
+    return Inst<U1>(Opcode::LogicalXor, a, b);
+}
+
 U1 IREmitter::LogicalNot(const U1& value) {
     return Inst<U1>(Opcode::LogicalNot, value);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 1af79f41cb..7ff763ecf4 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -4,8 +4,12 @@
 
 #pragma once
 
+#include <cstring>
+#include <type_traits>
+
 #include "shader_recompiler/frontend/ir/attribute.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
 #include "shader_recompiler/frontend/ir/value.h"
 
 namespace Shader::IR {
@@ -52,6 +56,22 @@ public:
     [[nodiscard]] U32 GetAttribute(IR::Attribute attribute);
     void SetAttribute(IR::Attribute attribute, const U32& value);
 
+    [[nodiscard]] U32 WorkgroupIdX();
+    [[nodiscard]] U32 WorkgroupIdY();
+    [[nodiscard]] U32 WorkgroupIdZ();
+
+    [[nodiscard]] U32 LocalInvocationIdX();
+    [[nodiscard]] U32 LocalInvocationIdY();
+    [[nodiscard]] U32 LocalInvocationIdZ();
+
+    [[nodiscard]] U32 LoadGlobalU8(const U64& address);
+    [[nodiscard]] U32 LoadGlobalS8(const U64& address);
+    [[nodiscard]] U32 LoadGlobalU16(const U64& address);
+    [[nodiscard]] U32 LoadGlobalS16(const U64& address);
+    [[nodiscard]] U32 LoadGlobal32(const U64& address);
+    [[nodiscard]] Value LoadGlobal64(const U64& address);
+    [[nodiscard]] Value LoadGlobal128(const U64& address);
+
     void WriteGlobalU8(const U64& address, const U32& value);
     void WriteGlobalS8(const U64& address, const U32& value);
     void WriteGlobalU16(const U64& address, const U32& value);
@@ -71,6 +91,8 @@ public:
                                            const UAny& e4);
     [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element);
 
+    [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value);
+
     [[nodiscard]] U64 PackUint2x32(const Value& vector);
     [[nodiscard]] Value UnpackUint2x32(const U64& value);
 
@@ -80,8 +102,10 @@ public:
     [[nodiscard]] U64 PackDouble2x32(const Value& vector);
     [[nodiscard]] Value UnpackDouble2x32(const U64& value);
 
-    [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b);
-    [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b);
+    [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {});
+    [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {});
+    [[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c,
+                                  FpControl control = {});
 
     [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value);
     [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value);
@@ -100,8 +124,31 @@ public:
     [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value);
     [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value);
 
+    [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
+    [[nodiscard]] U32 IMul(const U32& a, const U32& b);
+    [[nodiscard]] U32 INeg(const U32& value);
+    [[nodiscard]] U32 IAbs(const U32& value);
+    [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift);
+    [[nodiscard]] U32 ShiftRightLogical(const U32& base, const U32& shift);
+    [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift);
+    [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
+    [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
+    [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
+    [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
+                                     const U32& count);
+    [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
+                                      bool is_signed);
+
+    [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs);
+    [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
+    [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
+    [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+
     [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
     [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
+    [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
     [[nodiscard]] U1 LogicalNot(const U1& value);
 
     [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value);
@@ -118,6 +165,22 @@ private:
         auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})};
         return T{Value{&*it}};
     }
+
+    template <typename T>
+    requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v<T>) struct Flags {
+        Flags() = default;
+        Flags(T proxy_) : proxy{proxy_} {}
+
+        T proxy;
+    };
+
+    template <typename T = Value, typename FlagType, typename... Args>
+    T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
+        u64 raw_flags{};
+        std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
+        auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
+        return T{Value{&*it}};
+    }
 };
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 7f1ed6710c..61849695ac 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -5,7 +5,9 @@
 #pragma once
 
 #include <array>
+#include <cstring>
 #include <span>
+#include <type_traits>
 #include <vector>
 
 #include <boost/intrusive/list.hpp>
@@ -23,7 +25,7 @@ constexpr size_t MAX_ARG_COUNT = 4;
 
 class Inst : public boost::intrusive::list_base_hook<> {
 public:
-    explicit Inst(Opcode op_) noexcept : op(op_) {}
+    explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {}
 
     /// Get the number of uses this instruction has.
     [[nodiscard]] int UseCount() const noexcept {
@@ -73,6 +75,14 @@ public:
 
     void ReplaceUsesWith(Value replacement);
 
+    template <typename FlagsType>
+    requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v<FlagsType>)
+        [[nodiscard]] FlagsType Flags() const noexcept {
+        FlagsType ret;
+        std::memcpy(&ret, &flags, sizeof(ret));
+        return ret;
+    }
+
 private:
     void Use(const Value& value);
     void UndoUse(const Value& value);
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
new file mode 100644
index 0000000000..28bb9e798c
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+namespace Shader::IR {
+
+enum class FmzMode {
+    None, // Denorms are not flushed, NAN is propagated (nouveau)
+    FTZ,  // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
+    FMZ,  // Flush denorms to zero, x * 0 == 0 (D3D9)
+};
+
+enum class FpRounding {
+    RN, // Round to nearest even,
+    RM, // Round towards negative infinity
+    RP, // Round towards positive infinity
+    RZ, // Round towards zero
+};
+
+struct FpControl {
+    bool no_contraction{false};
+    FpRounding rounding : 8 = FpRounding::RN;
+    FmzMode fmz_mode : 8 = FmzMode::FTZ;
+};
+static_assert(sizeof(FpControl) <= sizeof(u64));
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc
index 40759e96ab..4ecb5e936d 100644
--- a/src/shader_recompiler/frontend/ir/opcode.inc
+++ b/src/shader_recompiler/frontend/ir/opcode.inc
@@ -35,6 +35,12 @@ OPCODE(SetZFlag,                                            Void,           U1,
 OPCODE(SetSFlag,                                            Void,           U1,                                                             )
 OPCODE(SetCFlag,                                            Void,           U1,                                                             )
 OPCODE(SetOFlag,                                            Void,           U1,                                                             )
+OPCODE(WorkgroupIdX,                                        U32,                                                                            )
+OPCODE(WorkgroupIdY,                                        U32,                                                                            )
+OPCODE(WorkgroupIdZ,                                        U32,                                                                            )
+OPCODE(LocalInvocationIdX,                                  U32,                                                                            )
+OPCODE(LocalInvocationIdY,                                  U32,                                                                            )
+OPCODE(LocalInvocationIdZ,                                  U32,                                                                            )
 
 // Undefined
 OPCODE(Undef1,                                              U1,                                                                             )
@@ -44,6 +50,13 @@ OPCODE(Undef32,                                             U32,
 OPCODE(Undef64,                                             U64,                                                                            )
 
 // Memory operations
+OPCODE(LoadGlobalU8,                                        U32,            U64,                                                            )
+OPCODE(LoadGlobalS8,                                        U32,            U64,                                                            )
+OPCODE(LoadGlobalU16,                                       U32,            U64,                                                            )
+OPCODE(LoadGlobalS16,                                       U32,            U64,                                                            )
+OPCODE(LoadGlobal32,                                        U32,            U64,                                                            )
+OPCODE(LoadGlobal64,                                        Opaque,         U64,                                                            )
+OPCODE(LoadGlobal128,                                       Opaque,         U64,                                                            )
 OPCODE(WriteGlobalU8,                                       Void,           U64,            U32,                                            )
 OPCODE(WriteGlobalS8,                                       Void,           U64,            U32,                                            )
 OPCODE(WriteGlobalU16,                                      Void,           U64,            U32,                                            )
@@ -58,6 +71,12 @@ OPCODE(CompositeConstruct3,                                 Opaque,         Opaq
 OPCODE(CompositeConstruct4,                                 Opaque,         Opaque,         Opaque,         Opaque,         Opaque,         )
 OPCODE(CompositeExtract,                                    Opaque,         Opaque,         U32,                                            )
 
+// Select operations
+OPCODE(Select8,                                             U8,             U1,             U8,             U8,                             )
+OPCODE(Select16,                                            U16,            U1,             U16,            U16,                            )
+OPCODE(Select32,                                            U32,            U1,             U32,            U32,                            )
+OPCODE(Select64,                                            U64,            U1,             U64,            U64,                            )
+
 // Bitwise conversions
 OPCODE(PackUint2x32,                                        U64,            Opaque,                                                         )
 OPCODE(UnpackUint2x32,                                      Opaque,         U64,                                                            )
@@ -74,56 +93,84 @@ OPCODE(GetOverflowFromOp,                                   U1,             Opaq
 OPCODE(GetZSCOFromOp,                                       ZSCO,           Opaque,                                                         )
 
 // Floating-point operations
-OPCODE(FPAbs16,                                             U16,            U16                                                             )
-OPCODE(FPAbs32,                                             U32,            U32                                                             )
-OPCODE(FPAbs64,                                             U64,            U64                                                             )
-OPCODE(FPAdd16,                                             U16,            U16,            U16                                             )
-OPCODE(FPAdd32,                                             U32,            U32,            U32                                             )
-OPCODE(FPAdd64,                                             U64,            U64,            U64                                             )
-OPCODE(FPFma16,                                             U16,            U16,            U16                                             )
-OPCODE(FPFma32,                                             U32,            U32,            U32                                             )
-OPCODE(FPFma64,                                             U64,            U64,            U64                                             )
-OPCODE(FPMax32,                                             U32,            U32,            U32                                             )
-OPCODE(FPMax64,                                             U64,            U64,            U64                                             )
-OPCODE(FPMin32,                                             U32,            U32,            U32                                             )
-OPCODE(FPMin64,                                             U64,            U64,            U64                                             )
-OPCODE(FPMul16,                                             U16,            U16,            U16                                             )
-OPCODE(FPMul32,                                             U32,            U32,            U32                                             )
-OPCODE(FPMul64,                                             U64,            U64,            U64                                             )
-OPCODE(FPNeg16,                                             U16,            U16                                                             )
-OPCODE(FPNeg32,                                             U32,            U32                                                             )
-OPCODE(FPNeg64,                                             U64,            U64                                                             )
-OPCODE(FPRecip32,                                           U32,            U32                                                             )
-OPCODE(FPRecip64,                                           U64,            U64                                                             )
-OPCODE(FPRecipSqrt32,                                       U32,            U32                                                             )
-OPCODE(FPRecipSqrt64,                                       U64,            U64                                                             )
-OPCODE(FPSqrt,                                              U32,            U32                                                             )
-OPCODE(FPSin,                                               U32,            U32                                                             )
-OPCODE(FPSinNotReduced,                                     U32,            U32                                                             )
-OPCODE(FPExp2,                                              U32,            U32                                                             )
-OPCODE(FPExp2NotReduced,                                    U32,            U32                                                             )
-OPCODE(FPCos,                                               U32,            U32                                                             )
-OPCODE(FPCosNotReduced,                                     U32,            U32                                                             )
-OPCODE(FPLog2,                                              U32,            U32                                                             )
-OPCODE(FPSaturate16,                                        U16,            U16                                                             )
-OPCODE(FPSaturate32,                                        U32,            U32                                                             )
-OPCODE(FPSaturate64,                                        U64,            U64                                                             )
-OPCODE(FPRoundEven16,                                       U16,            U16                                                             )
-OPCODE(FPRoundEven32,                                       U32,            U32                                                             )
-OPCODE(FPRoundEven64,                                       U64,            U64                                                             )
-OPCODE(FPFloor16,                                           U16,            U16                                                             )
-OPCODE(FPFloor32,                                           U32,            U32                                                             )
-OPCODE(FPFloor64,                                           U64,            U64                                                             )
-OPCODE(FPCeil16,                                            U16,            U16                                                             )
-OPCODE(FPCeil32,                                            U32,            U32                                                             )
-OPCODE(FPCeil64,                                            U64,            U64                                                             )
-OPCODE(FPTrunc16,                                           U16,            U16                                                             )
-OPCODE(FPTrunc32,                                           U32,            U32                                                             )
-OPCODE(FPTrunc64,                                           U64,            U64                                                             )
+OPCODE(FPAbs16,                                             U16,            U16,                                                            )
+OPCODE(FPAbs32,                                             U32,            U32,                                                            )
+OPCODE(FPAbs64,                                             U64,            U64,                                                            )
+OPCODE(FPAdd16,                                             U16,            U16,            U16,                                            )
+OPCODE(FPAdd32,                                             U32,            U32,            U32,                                            )
+OPCODE(FPAdd64,                                             U64,            U64,            U64,                                            )
+OPCODE(FPFma16,                                             U16,            U16,            U16,            U16,                            )
+OPCODE(FPFma32,                                             U32,            U32,            U32,            U32,                            )
+OPCODE(FPFma64,                                             U64,            U64,            U64,            U64,                            )
+OPCODE(FPMax32,                                             U32,            U32,            U32,                                            )
+OPCODE(FPMax64,                                             U64,            U64,            U64,                                            )
+OPCODE(FPMin32,                                             U32,            U32,            U32,                                            )
+OPCODE(FPMin64,                                             U64,            U64,            U64,                                            )
+OPCODE(FPMul16,                                             U16,            U16,            U16,                                            )
+OPCODE(FPMul32,                                             U32,            U32,            U32,                                            )
+OPCODE(FPMul64,                                             U64,            U64,            U64,                                            )
+OPCODE(FPNeg16,                                             U16,            U16,                                                            )
+OPCODE(FPNeg32,                                             U32,            U32,                                                            )
+OPCODE(FPNeg64,                                             U64,            U64,                                                            )
+OPCODE(FPRecip32,                                           U32,            U32,                                                            )
+OPCODE(FPRecip64,                                           U64,            U64,                                                            )
+OPCODE(FPRecipSqrt32,                                       U32,            U32,                                                            )
+OPCODE(FPRecipSqrt64,                                       U64,            U64,                                                            )
+OPCODE(FPSqrt,                                              U32,            U32,                                                            )
+OPCODE(FPSin,                                               U32,            U32,                                                            )
+OPCODE(FPSinNotReduced,                                     U32,            U32,                                                            )
+OPCODE(FPExp2,                                              U32,            U32,                                                            )
+OPCODE(FPExp2NotReduced,                                    U32,            U32,                                                            )
+OPCODE(FPCos,                                               U32,            U32,                                                            )
+OPCODE(FPCosNotReduced,                                     U32,            U32,                                                            )
+OPCODE(FPLog2,                                              U32,            U32,                                                            )
+OPCODE(FPSaturate16,                                        U16,            U16,                                                            )
+OPCODE(FPSaturate32,                                        U32,            U32,                                                            )
+OPCODE(FPSaturate64,                                        U64,            U64,                                                            )
+OPCODE(FPRoundEven16,                                       U16,            U16,                                                            )
+OPCODE(FPRoundEven32,                                       U32,            U32,                                                            )
+OPCODE(FPRoundEven64,                                       U64,            U64,                                                            )
+OPCODE(FPFloor16,                                           U16,            U16,                                                            )
+OPCODE(FPFloor32,                                           U32,            U32,                                                            )
+OPCODE(FPFloor64,                                           U64,            U64,                                                            )
+OPCODE(FPCeil16,                                            U16,            U16,                                                            )
+OPCODE(FPCeil32,                                            U32,            U32,                                                            )
+OPCODE(FPCeil64,                                            U64,            U64,                                                            )
+OPCODE(FPTrunc16,                                           U16,            U16,                                                            )
+OPCODE(FPTrunc32,                                           U32,            U32,                                                            )
+OPCODE(FPTrunc64,                                           U64,            U64,                                                            )
+
+// Integer operations
+OPCODE(IAdd32,                                              U32,            U32,            U32,                                            )
+OPCODE(IAdd64,                                              U64,            U64,            U64,                                            )
+OPCODE(IMul32,                                              U32,            U32,            U32,                                            )
+OPCODE(INeg32,                                              U32,            U32,                                                            )
+OPCODE(IAbs32,                                              U32,            U32,                                                            )
+OPCODE(ShiftLeftLogical32,                                  U32,            U32,            U32,                                            )
+OPCODE(ShiftRightLogical32,                                 U32,            U32,            U32,                                            )
+OPCODE(ShiftRightArithmetic32,                              U32,            U32,            U32,                                            )
+OPCODE(BitwiseAnd32,                                        U32,            U32,            U32,                                            )
+OPCODE(BitwiseOr32,                                         U32,            U32,            U32,                                            )
+OPCODE(BitwiseXor32,                                        U32,            U32,            U32,                                            )
+OPCODE(BitFieldInsert,                                      U32,            U32,            U32,            U32,            U32,            )
+OPCODE(BitFieldSExtract,                                    U32,            U32,            U32,            U32,                            )
+OPCODE(BitFieldUExtract,                                    U32,            U32,            U32,            U32,                            )
+
+OPCODE(SLessThan,                                           U1,             U32,            U32,                                            )
+OPCODE(ULessThan,                                           U1,             U32,            U32,                                            )
+OPCODE(IEqual,                                              U1,             U32,            U32,                                            )
+OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                            )
+OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                            )
+OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                            )
+OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                            )
+OPCODE(INotEqual,                                           U1,             U32,            U32,                                            )
+OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                            )
+OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                            )
 
 // Logical operations
 OPCODE(LogicalOr,                                           U1,             U1,             U1,                                             )
 OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                             )
+OPCODE(LogicalXor,                                          U1,             U1,             U1,                                             )
 OPCODE(LogicalNot,                                          U1,             U1,                                                             )
 
 // Conversion operations
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h
index daf23193f2..c6f2f82bfb 100644
--- a/src/shader_recompiler/frontend/ir/pred.h
+++ b/src/shader_recompiler/frontend/ir/pred.h
@@ -8,7 +8,16 @@
 
 namespace Shader::IR {
 
-enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT };
+enum class Pred : u64 {
+    P0,
+    P1,
+    P2,
+    P3,
+    P4,
+    P5,
+    P6,
+    PT,
+};
 
 constexpr size_t NUM_USER_PREDS = 6;
 constexpr size_t NUM_PREDS = 7;
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 49d1f4bfb7..bd1f96c079 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -56,6 +56,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) {
         Optimization::Invoke(Optimization::IdentityRemovalPass, function);
         // Optimization::Invoke(Optimization::VerificationPass, function);
     }
+    //*/
 }
 
 std::string DumpProgram(const Program& program) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 0000000000..3da37a2bb5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,56 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+
+namespace Shader::Maxwell {
+
+enum class FpRounding : u64 {
+    RN,
+    RM,
+    RP,
+    RZ,
+};
+
+enum class FmzMode : u64 {
+    None,
+    FTZ,
+    FMZ,
+    INVALIDFMZ3,
+};
+
+inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
+    switch (fp_rounding) {
+    case FpRounding::RN:
+        return IR::FpRounding::RN;
+    case FpRounding::RM:
+        return IR::FpRounding::RM;
+    case FpRounding::RP:
+        return IR::FpRounding::RP;
+    case FpRounding::RZ:
+        return IR::FpRounding::RZ;
+    }
+    throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
+}
+
+inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
+    switch (fmz_mode) {
+    case FmzMode::None:
+        return IR::FmzMode::None;
+    case FmzMode::FTZ:
+        return IR::FmzMode::FTZ;
+    case FmzMode::FMZ:
+        return IR::FmzMode::FMZ;
+    case FmzMode::INVALIDFMZ3:
+        break;
+    }
+    throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 0000000000..d2c44b9ccd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
+          const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a;
+    } const fadd{insn};
+
+    if (sat) {
+        throw NotImplementedException("FADD SAT");
+    }
+    if (cc) {
+        throw NotImplementedException("FADD CC");
+    }
+    const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)};
+    const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
+    IR::FpControl control{
+        .no_contraction{true},
+        .rounding{CastFpRounding(fp_rounding)},
+        .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
+    };
+    v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
+}
+
+void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
+    union {
+        u64 raw;
+        BitField<39, 2, FpRounding> fp_rounding;
+        BitField<44, 1, u64> ftz;
+        BitField<45, 1, u64> neg_b;
+        BitField<46, 1, u64> abs_a;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> neg_a;
+        BitField<49, 1, u64> abs_b;
+        BitField<50, 1, u64> sat;
+    } const fadd{insn};
+
+    FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
+         fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FADD_reg(u64 insn) {
+    FADD(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::FADD_cbuf(u64) {
+    throw NotImplementedException("FADD (cbuf)");
+}
+
+void TranslatorVisitor::FADD_imm(u64) {
+    throw NotImplementedException("FADD (imm)");
+}
+
+void TranslatorVisitor::FADD32I(u64) {
+    throw NotImplementedException("FADD32I");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 0000000000..30ca052ec5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,73 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a,
+          bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a;
+    } const ffma{insn};
+
+    if (sat) {
+        throw NotImplementedException("FFMA SAT");
+    }
+    if (cc) {
+        throw NotImplementedException("FFMA CC");
+    }
+    const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)};
+    const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+    const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
+    const IR::FpControl fp_control{
+        .no_contraction{true},
+        .rounding{CastFpRounding(fp_rounding)},
+        .fmz_mode{CastFmzMode(fmz_mode)},
+    };
+    v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control));
+}
+
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) {
+    union {
+        u64 raw;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> neg_b;
+        BitField<49, 1, u64> neg_c;
+        BitField<50, 1, u64> sat;
+        BitField<51, 2, FpRounding> fp_rounding;
+        BitField<53, 2, FmzMode> fmz_mode;
+    } const ffma{insn};
+
+    FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
+         ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FFMA_reg(u64 insn) {
+    FFMA(*this, insn, GetReg20(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_rc(u64) {
+    throw NotImplementedException("FFMA (rc)");
+}
+
+void TranslatorVisitor::FFMA_cr(u64 insn) {
+    FFMA(*this, insn, GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_imm(u64) {
+    throw NotImplementedException("FFMA (imm)");
+}
+
+void TranslatorVisitor::FFMA32I(u64) {
+    throw NotImplementedException("FFMA32I");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 0000000000..743a1e2f0f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,108 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Scale : u64 {
+    None,
+    D2,
+    D4,
+    D8,
+    M8,
+    M4,
+    M2,
+    INVALIDSCALE37,
+};
+
+float ScaleFactor(Scale scale) {
+    switch (scale) {
+    case Scale::None:
+        return 1.0f;
+    case Scale::D2:
+        return 1.0f / 2.0f;
+    case Scale::D4:
+        return 1.0f / 4.0f;
+    case Scale::D8:
+        return 1.0f / 8.0f;
+    case Scale::M8:
+        return 8.0f;
+    case Scale::M4:
+        return 4.0f;
+    case Scale::M2:
+        return 2.0f;
+    case Scale::INVALIDSCALE37:
+        break;
+    }
+    throw NotImplementedException("Invalid FMUL scale {}", scale);
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode,
+          FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a;
+    } const fmul{insn};
+
+    if (cc) {
+        throw NotImplementedException("FMUL CC");
+    }
+    if (sat) {
+        throw NotImplementedException("FMUL SAT");
+    }
+    IR::U32 op_a{v.X(fmul.src_a)};
+    if (scale != Scale::None) {
+        if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
+            throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
+        }
+        op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
+    }
+    const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+    const IR::FpControl fp_control{
+        .no_contraction{true},
+        .rounding{CastFpRounding(fp_rounding)},
+        .fmz_mode{CastFmzMode(fmz_mode)},
+    };
+    v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control));
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
+    union {
+        u64 raw;
+        BitField<39, 2, FpRounding> fp_rounding;
+        BitField<41, 3, Scale> scale;
+        BitField<44, 2, FmzMode> fmz;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> neg_b;
+        BitField<50, 1, u64> sat;
+    } fmul{insn};
+
+    FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
+         fmul.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FMUL_reg(u64 insn) {
+    return FMUL(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::FMUL_cbuf(u64) {
+    throw NotImplementedException("FMUL (cbuf)");
+}
+
+void TranslatorVisitor::FMUL_imm(u64) {
+    throw NotImplementedException("FMUL (imm)");
+}
+
+void TranslatorVisitor::FMUL32I(u64) {
+    throw NotImplementedException("FMUL32I");
+}
+
+} // namespace Shader::Maxwell
\ No newline at end of file
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 7bc7ce9f29..548c7f611d 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -16,6 +16,22 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
     ir.SetReg(dest_reg, value);
 }
 
+IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 8, IR::Reg> index;
+    } const reg{insn};
+    return X(reg.index);
+}
+
+IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
+    union {
+        u64 raw;
+        BitField<39, 8, IR::Reg> index;
+    } const reg{insn};
+    return X(reg.index);
+}
+
 IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
     union {
         u64 raw;
@@ -33,7 +49,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
     return ir.GetCbuf(binding, byte_offset);
 }
 
-IR::U32 TranslatorVisitor::GetImm(u64 insn) {
+IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
     union {
         u64 raw;
         BitField<20, 19, u64> value;
@@ -44,6 +60,14 @@ IR::U32 TranslatorVisitor::GetImm(u64 insn) {
     return ir.Imm32(value);
 }
 
+IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 32, u64> value;
+    } const imm{insn};
+    return ir.Imm32(static_cast<u32>(imm.value));
+}
+
 void TranslatorVisitor::SetZFlag(const IR::U1& value) {
     ir.SetZFlag(value);
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index 8be7d6ff1b..ef6d977fef 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -46,7 +46,7 @@ public:
     void DADD_reg(u64 insn);
     void DADD_cbuf(u64 insn);
     void DADD_imm(u64 insn);
-    void DEPBAR(u64 insn);
+    void DEPBAR();
     void DFMA_reg(u64 insn);
     void DFMA_rc(u64 insn);
     void DFMA_cr(u64 insn);
@@ -298,9 +298,14 @@ public:
     [[nodiscard]] IR::U32 X(IR::Reg reg);
     void X(IR::Reg dest_reg, const IR::U32& value);
 
+    [[nodiscard]] IR::U32 GetReg20(u64 insn);
+    [[nodiscard]] IR::U32 GetReg39(u64 insn);
+
     [[nodiscard]] IR::U32 GetCbuf(u64 insn);
 
-    [[nodiscard]] IR::U32 GetImm(u64 insn);
+    [[nodiscard]] IR::U32 GetImm20(u64 insn);
+
+    [[nodiscard]] IR::U32 GetImm32(u64 insn);
 
     void SetZFlag(const IR::U1& value);
     void SetSFlag(const IR::U1& value);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 0000000000..60f79b1606
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,106 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
+          bool cc) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a;
+    } const iadd{insn};
+
+    if (sat) {
+        throw NotImplementedException("IADD SAT");
+    }
+    if (x && po) {
+        throw NotImplementedException("IADD X+PO");
+    }
+    // Operand A is always read from here, negated if needed
+    IR::U32 op_a{v.X(iadd.src_a)};
+    if (neg_a) {
+        op_a = v.ir.INeg(op_a);
+    }
+    // Add both operands
+    IR::U32 result{v.ir.IAdd(op_a, op_b)};
+    if (x) {
+        const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
+        result = v.ir.IAdd(result, carry);
+    }
+    if (po) {
+        // .PO adds one to the result
+        result = v.ir.IAdd(result, v.ir.Imm32(1));
+    }
+    if (cc) {
+        // Store flags
+        // TODO: Does this grab the result pre-PO or after?
+        if (po) {
+            throw NotImplementedException("IADD CC+PO");
+        }
+        // TODO: How does CC behave when X is set?
+        if (x) {
+            throw NotImplementedException("IADD X+CC");
+        }
+        v.SetZFlag(v.ir.GetZeroFromOp(result));
+        v.SetSFlag(v.ir.GetSignFromOp(result));
+        v.SetCFlag(v.ir.GetCarryFromOp(result));
+        v.SetOFlag(v.ir.GetOverflowFromOp(result));
+    }
+    // Store result
+    v.X(iadd.dest_reg, result);
+}
+
+void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+    union {
+        u64 insn;
+        BitField<43, 1, u64> x;
+        BitField<47, 1, u64> cc;
+        BitField<48, 2, u64> three_for_po;
+        BitField<48, 1, u64> neg_b;
+        BitField<49, 1, u64> neg_a;
+        BitField<50, 1, u64> sat;
+    } const iadd{insn};
+
+    const bool po{iadd.three_for_po == 3};
+    const bool neg_a{!po && iadd.neg_a != 0};
+    if (!po && iadd.neg_b != 0) {
+        op_b = v.ir.INeg(op_b);
+    }
+    IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IADD_reg(u64) {
+    throw NotImplementedException("IADD (reg)");
+}
+
+void TranslatorVisitor::IADD_cbuf(u64 insn) {
+    IADD(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::IADD_imm(u64) {
+    throw NotImplementedException("IADD (imm)");
+}
+
+void TranslatorVisitor::IADD32I(u64 insn) {
+    union {
+        u64 raw;
+        BitField<52, 1, u64> cc;
+        BitField<53, 1, u64> x;
+        BitField<54, 1, u64> sat;
+        BitField<55, 2, u64> three_for_po;
+        BitField<56, 1, u64> neg_a;
+    } const iadd32i{insn};
+
+    const bool po{iadd32i.three_for_po == 3};
+    const bool neg_a{!po && iadd32i.neg_a != 0};
+    IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 0000000000..f92c0bbd60
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,73 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> op_a;
+        BitField<47, 1, u64> cc;
+        BitField<48, 2, u64> three_for_po;
+        BitField<48, 1, u64> neg_b;
+        BitField<49, 1, u64> neg_a;
+        BitField<39, 5, u64> scale;
+    } const iscadd{insn};
+
+    const bool po{iscadd.three_for_po == 3};
+    IR::U32 op_a{v.X(iscadd.op_a)};
+    if (!po) {
+        // When PO is not present, the bits are interpreted as negation
+        if (iscadd.neg_a != 0) {
+            op_a = v.ir.INeg(op_a);
+        }
+        if (iscadd.neg_b != 0) {
+            op_b = v.ir.INeg(op_b);
+        }
+    }
+    // With the operands already processed, scale A
+    const IR::U32 scale{v.ir.Imm32(static_cast<u32>(iscadd.scale))};
+    const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
+
+    IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
+    if (po) {
+        // .PO adds one to the final result
+        result = v.ir.IAdd(result, v.ir.Imm32(1));
+    }
+    v.X(iscadd.dest_reg, result);
+
+    if (iscadd.cc != 0) {
+        throw NotImplementedException("ISCADD CC");
+    }
+}
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ISCADD_reg(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 8, IR::Reg> op_b;
+    } const iscadd{insn};
+
+    ISCADD(*this, insn, X(iscadd.op_b));
+}
+
+void TranslatorVisitor::ISCADD_cbuf(u64) {
+    throw NotImplementedException("ISCADD (cbuf)");
+}
+
+void TranslatorVisitor::ISCADD_imm(u64) {
+    throw NotImplementedException("ISCADD (imm)");
+}
+
+void TranslatorVisitor::ISCADD32I(u64) {
+    throw NotImplementedException("ISCADD32I");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 0000000000..76c6b52910
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,99 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class CompareOp : u64 {
+    F,  // Always false
+    LT, // Less than
+    EQ, // Equal
+    LE, // Less than or equal
+    GT, // Greater than
+    NE, // Not equal
+    GE, // Greater than or equal
+    T,  // Always true
+};
+
+enum class Bop : u64 {
+    AND,
+    OR,
+    XOR,
+};
+
+IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs,
+               bool is_signed) {
+    switch (op) {
+    case CompareOp::F:
+        return ir.Imm1(false);
+    case CompareOp::LT:
+        return ir.ILessThan(lhs, rhs, is_signed);
+    case CompareOp::EQ:
+        return ir.IEqual(lhs, rhs);
+    case CompareOp::LE:
+        return ir.ILessThanEqual(lhs, rhs, is_signed);
+    case CompareOp::GT:
+        return ir.IGreaterThan(lhs, rhs, is_signed);
+    case CompareOp::NE:
+        return ir.INotEqual(lhs, rhs);
+    case CompareOp::GE:
+        return ir.IGreaterThanEqual(lhs, rhs, is_signed);
+    case CompareOp::T:
+        return ir.Imm1(true);
+    }
+    throw NotImplementedException("Invalid ISETP compare op {}", op);
+}
+
+IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) {
+    switch (bop) {
+    case Bop::AND:
+        return ir.LogicalAnd(comparison, bop_pred);
+    case Bop::OR:
+        return ir.LogicalOr(comparison, bop_pred);
+    case Bop::XOR:
+        return ir.LogicalXor(comparison, bop_pred);
+    }
+    throw NotImplementedException("Invalid ISETP bop {}", bop);
+}
+
+void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+    union {
+        u64 raw;
+        BitField<0, 3, IR::Pred> dest_pred_b;
+        BitField<3, 3, IR::Pred> dest_pred_a;
+        BitField<8, 8, IR::Reg> src_reg_a;
+        BitField<39, 3, IR::Pred> bop_pred;
+        BitField<42, 1, u64> neg_bop_pred;
+        BitField<45, 2, Bop> bop;
+        BitField<48, 1, u64> is_signed;
+        BitField<49, 3, CompareOp> compare_op;
+    } const isetp{insn};
+
+    const Bop bop{isetp.bop};
+    const IR::U32 op_a{v.X(isetp.src_reg_a)};
+    const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)};
+    const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
+    const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)};
+    const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)};
+    v.ir.SetPred(isetp.dest_pred_a, result_a);
+    v.ir.SetPred(isetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ISETP_reg(u64 insn) {
+    ISETP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISETP_cbuf(u64 insn) {
+    ISETP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISETP_imm(u64) {
+    throw NotImplementedException("ISETP_imm");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 0000000000..d4b417d14a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg_a;
+        BitField<39, 1, u64> w;
+        BitField<43, 1, u64> x;
+        BitField<47, 1, u64> cc;
+    } const shl{insn};
+
+    if (shl.x != 0) {
+        throw NotImplementedException("SHL.X");
+    }
+    if (shl.cc != 0) {
+        throw NotImplementedException("SHL.CC");
+    }
+    const IR::U32 base{v.X(shl.src_reg_a)};
+    IR::U32 result;
+    if (shl.w != 0) {
+        // When .W is set, the shift value is wrapped
+        // To emulate this we just have to clamp it ourselves.
+        const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
+        result = v.ir.ShiftLeftLogical(base, shift);
+    } else {
+        // When .W is not set, the shift value is clamped between 0 and 32.
+        // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
+        // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
+        //
+        // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
+        // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
+        //  or equal to the bit width of the components of Base."
+        //
+        // And on the GLASM specification it is also safe to evaluate out of bounds:
+        //
+        // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
+        // "The results of a shift operation ("<<") are undefined if the value of the second operand
+        //  is negative, or greater than or equal to the number of bits in the first operand."
+        //
+        // Emphasis on undefined results in contrast to undefined behavior.
+        //
+        const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
+        const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
+        result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0));
+    }
+    v.X(shl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHL_reg(u64) {
+    throw NotImplementedException("SHL_reg");
+}
+
+void TranslatorVisitor::SHL_cbuf(u64) {
+    throw NotImplementedException("SHL_cbuf");
+}
+
+void TranslatorVisitor::SHL_imm(u64 insn) {
+    SHL(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 0000000000..70a7c76c55
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SelectMode : u64 {
+    Default,
+    CLO,
+    CHI,
+    CSFU,
+    CBCC,
+};
+
+enum class Half : u64 {
+    H0, // Least-significant bits (15:0)
+    H1, // Most-significant bits (31:16)
+};
+
+IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
+    const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
+    return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
+}
+
+void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
+          SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg_a;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> is_a_signed;
+        BitField<49, 1, u64> is_b_signed;
+        BitField<53, 1, Half> half_a;
+    } const xmad{insn};
+
+    if (x) {
+        throw NotImplementedException("XMAD X");
+    }
+    const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
+    const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
+
+    IR::U32 product{v.ir.IMul(op_a, op_b)};
+    if (psl) {
+        // .PSL shifts the product 16 bits
+        product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
+    }
+    const IR::U32 op_c{[&]() -> IR::U32 {
+        switch (select_mode) {
+        case SelectMode::Default:
+            return src_c;
+        case SelectMode::CLO:
+            return ExtractHalf(v, src_c, Half::H0, false);
+        case SelectMode::CHI:
+            return ExtractHalf(v, src_c, Half::H1, false);
+        case SelectMode::CBCC:
+            return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b);
+        case SelectMode::CSFU:
+            throw NotImplementedException("XMAD CSFU");
+        }
+        throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
+    }()};
+    IR::U32 result{v.ir.IAdd(product, op_c)};
+    if (mrg) {
+        // .MRG inserts src_b [15:0] into result's [31:16].
+        const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
+        result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
+    }
+    if (xmad.cc) {
+        throw NotImplementedException("XMAD CC");
+    }
+    // Store result
+    v.X(xmad.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::XMAD_reg(u64) {
+    throw NotImplementedException("XMAD (reg)");
+}
+
+void TranslatorVisitor::XMAD_rc(u64) {
+    throw NotImplementedException("XMAD (rc)");
+}
+
+void TranslatorVisitor::XMAD_cr(u64) {
+    throw NotImplementedException("XMAD (cr)");
+}
+
+void TranslatorVisitor::XMAD_imm(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 16, u64> src_b;
+        BitField<36, 1, u64> psl;
+        BitField<37, 1, u64> mrg;
+        BitField<38, 1, u64> x;
+        BitField<39, 8, IR::Reg> src_c;
+        BitField<50, 3, SelectMode> select_mode;
+    } const xmad{insn};
+
+    const IR::U32 src_b{ir.Imm32(static_cast<u32>(xmad.src_b))};
+    const IR::U32 src_c{X(xmad.src_c)};
+    XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0,
+         xmad.x != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
index d8fd387cfb..c9669c6178 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -10,16 +10,35 @@
 
 namespace Shader::Maxwell {
 namespace {
+enum class LoadSize : u64 {
+    U8,  // Zero-extend
+    S8,  // Sign-extend
+    U16, // Zero-extend
+    S16, // Sign-extend
+    B32,
+    B64,
+    B128,
+    U128, // ???
+};
+
 enum class StoreSize : u64 {
-    U8,
-    S8,
-    U16,
-    S16,
+    U8,  // Zero-extend
+    S8,  // Sign-extend
+    U16, // Zero-extend
+    S16, // Sign-extend
     B32,
     B64,
     B128,
 };
 
+// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
+enum class LoadCache : u64 {
+    CA, // Cache at all levels, likely to be accessed again
+    CG, // Cache at global level (cache in L2 and below, not L1)
+    CI, // ???
+    CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
+};
+
 // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
 enum class StoreCache : u64 {
     WB, // Cache write-back all coherent levels
@@ -27,61 +46,137 @@ enum class StoreCache : u64 {
     CS, // Cache streaming, likely to be accessed once
     WT, // Cache write-through (to system memory)
 };
-} // Anonymous namespace
 
-void TranslatorVisitor::STG(u64 insn) {
-    // STG stores registers into global memory.
+IR::U64 Address(TranslatorVisitor& v, u64 insn) {
     union {
         u64 raw;
-        BitField<0, 8, IR::Reg> data_reg;
         BitField<8, 8, IR::Reg> addr_reg;
+        BitField<20, 24, s64> addr_offset;
+        BitField<20, 24, u64> rz_addr_offset;
         BitField<45, 1, u64> e;
-        BitField<46, 2, StoreCache> cache;
-        BitField<48, 3, StoreSize> size;
-    } const stg{insn};
+    } const mem{insn};
 
     const IR::U64 address{[&]() -> IR::U64 {
-        if (stg.e == 0) {
-            // STG without .E uses a 32-bit pointer, zero-extend it
-            return ir.ConvertU(64, X(stg.addr_reg));
+        if (mem.e == 0) {
+            // LDG/STG without .E uses a 32-bit pointer, zero-extend it
+            return v.ir.ConvertU(64, v.X(mem.addr_reg));
         }
-        if (!IR::IsAligned(stg.addr_reg, 2)) {
+        if (!IR::IsAligned(mem.addr_reg, 2)) {
             throw NotImplementedException("Unaligned address register");
         }
-        // Pack two registers to build the 32-bit address
-        return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1)));
+        // Pack two registers to build the 64-bit address
+        return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
+    }()};
+    const u64 addr_offset{[&]() -> u64 {
+        if (mem.addr_reg == IR::Reg::RZ) {
+            // When RZ is used, the address is an absolute address
+            return static_cast<u64>(mem.rz_addr_offset.Value());
+        } else {
+            return static_cast<u64>(mem.addr_offset.Value());
+        }
     }()};
+    // Apply the offset
+    return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDG(u64 insn) {
+    // LDG loads global memory into registers
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<46, 2, LoadCache> cache;
+        BitField<48, 3, LoadSize> size;
+    } const ldg{insn};
+
+    // Pointer to load data from
+    const IR::U64 address{Address(*this, insn)};
+    const IR::Reg dest_reg{ldg.dest_reg};
+    switch (ldg.size) {
+    case LoadSize::U8:
+        X(dest_reg, ir.LoadGlobalU8(address));
+        break;
+    case LoadSize::S8:
+        X(dest_reg, ir.LoadGlobalS8(address));
+        break;
+    case LoadSize::U16:
+        X(dest_reg, ir.LoadGlobalU16(address));
+        break;
+    case LoadSize::S16:
+        X(dest_reg, ir.LoadGlobalS16(address));
+        break;
+    case LoadSize::B32:
+        X(dest_reg, ir.LoadGlobal32(address));
+        break;
+    case LoadSize::B64: {
+        if (!IR::IsAligned(dest_reg, 2)) {
+            throw NotImplementedException("Unaligned data registers");
+        }
+        const IR::Value vector{ir.LoadGlobal64(address)};
+        for (int i = 0; i < 2; ++i) {
+            X(dest_reg + i, ir.CompositeExtract(vector, i));
+        }
+        break;
+    }
+    case LoadSize::B128: {
+        if (!IR::IsAligned(dest_reg, 4)) {
+            throw NotImplementedException("Unaligned data registers");
+        }
+        const IR::Value vector{ir.LoadGlobal128(address)};
+        for (int i = 0; i < 4; ++i) {
+            X(dest_reg + i, ir.CompositeExtract(vector, i));
+        }
+        break;
+    }
+    case LoadSize::U128:
+        throw NotImplementedException("LDG U.128");
+    default:
+        throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
+    }
+}
+
+void TranslatorVisitor::STG(u64 insn) {
+    // STG stores registers into global memory.
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> data_reg;
+        BitField<46, 2, StoreCache> cache;
+        BitField<48, 3, StoreSize> size;
+    } const stg{insn};
 
+    // Pointer to store data into
+    const IR::U64 address{Address(*this, insn)};
+    const IR::Reg data_reg{stg.data_reg};
     switch (stg.size) {
     case StoreSize::U8:
-        ir.WriteGlobalU8(address, X(stg.data_reg));
+        ir.WriteGlobalU8(address, X(data_reg));
         break;
     case StoreSize::S8:
-        ir.WriteGlobalS8(address, X(stg.data_reg));
+        ir.WriteGlobalS8(address, X(data_reg));
         break;
     case StoreSize::U16:
-        ir.WriteGlobalU16(address, X(stg.data_reg));
+        ir.WriteGlobalU16(address, X(data_reg));
         break;
     case StoreSize::S16:
-        ir.WriteGlobalS16(address, X(stg.data_reg));
+        ir.WriteGlobalS16(address, X(data_reg));
         break;
     case StoreSize::B32:
-        ir.WriteGlobal32(address, X(stg.data_reg));
+        ir.WriteGlobal32(address, X(data_reg));
         break;
     case StoreSize::B64: {
-        if (!IR::IsAligned(stg.data_reg, 2)) {
+        if (!IR::IsAligned(data_reg, 2)) {
             throw NotImplementedException("Unaligned data registers");
         }
-        const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))};
+        const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
         ir.WriteGlobal64(address, vector);
         break;
     }
     case StoreSize::B128:
-        if (!IR::IsAligned(stg.data_reg, 4)) {
+        if (!IR::IsAligned(data_reg, 4)) {
             throw NotImplementedException("Unaligned data registers");
         }
-        const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1),
-                                                     X(stg.data_reg + 2), X(stg.data_reg + 3))};
+        const IR::Value vector{
+            ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
         ir.WriteGlobal128(address, vector);
         break;
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
new file mode 100644
index 0000000000..1711d3f48a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -0,0 +1,45 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcode.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+union MOV {
+    u64 raw;
+    BitField<0, 8, IR::Reg> dest_reg;
+    BitField<20, 8, IR::Reg> src_reg;
+    BitField<39, 4, u64> mask;
+};
+
+void CheckMask(MOV mov) {
+    if (mov.mask != 0xf) {
+        throw NotImplementedException("Non-full move mask");
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::MOV_reg(u64 insn) {
+    const MOV mov{insn};
+    CheckMask(mov);
+    X(mov.dest_reg, X(mov.src_reg));
+}
+
+void TranslatorVisitor::MOV_cbuf(u64 insn) {
+    const MOV mov{insn};
+    CheckMask(mov);
+    X(mov.dest_reg, GetCbuf(insn));
+}
+
+void TranslatorVisitor::MOV_imm(u64 insn) {
+    const MOV mov{insn};
+    CheckMask(mov);
+    X(mov.dest_reg, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 0000000000..93cea302a2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,114 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SpecialRegister : u64 {
+    SR_LANEID = 0,
+    SR_VIRTCFG = 2,
+    SR_VIRTID = 3,
+    SR_PM0 = 4,
+    SR_PM1 = 5,
+    SR_PM2 = 6,
+    SR_PM3 = 7,
+    SR_PM4 = 8,
+    SR_PM5 = 9,
+    SR_PM6 = 10,
+    SR_PM7 = 11,
+    SR_ORDERING_TICKET = 15,
+    SR_PRIM_TYPE = 16,
+    SR_INVOCATION_ID = 17,
+    SR_Y_DIRECTION = 18,
+    SR_THREAD_KILL = 19,
+    SM_SHADER_TYPE = 20,
+    SR_DIRECTCBEWRITEADDRESSLOW = 21,
+    SR_DIRECTCBEWRITEADDRESSHIGH = 22,
+    SR_DIRECTCBEWRITEENABLE = 23,
+    SR_MACHINE_ID_0 = 24,
+    SR_MACHINE_ID_1 = 25,
+    SR_MACHINE_ID_2 = 26,
+    SR_MACHINE_ID_3 = 27,
+    SR_AFFINITY = 28,
+    SR_INVOCATION_INFO = 29,
+    SR_WSCALEFACTOR_XY = 30,
+    SR_WSCALEFACTOR_Z = 31,
+    SR_TID = 32,
+    SR_TID_X = 33,
+    SR_TID_Y = 34,
+    SR_TID_Z = 35,
+    SR_CTAID_X = 37,
+    SR_CTAID_Y = 38,
+    SR_CTAID_Z = 39,
+    SR_NTID = 49,
+    SR_CirQueueIncrMinusOne = 50,
+    SR_NLATC = 51,
+    SR_SWINLO = 57,
+    SR_SWINSZ = 58,
+    SR_SMEMSZ = 59,
+    SR_SMEMBANKS = 60,
+    SR_LWINLO = 61,
+    SR_LWINSZ = 62,
+    SR_LMEMLOSZ = 63,
+    SR_LMEMHIOFF = 64,
+    SR_EQMASK = 65,
+    SR_LTMASK = 66,
+    SR_LEMASK = 67,
+    SR_GTMASK = 68,
+    SR_GEMASK = 69,
+    SR_REGALLOC = 70,
+    SR_GLOBALERRORSTATUS = 73,
+    SR_WARPERRORSTATUS = 75,
+    SR_PM_HI0 = 81,
+    SR_PM_HI1 = 82,
+    SR_PM_HI2 = 83,
+    SR_PM_HI3 = 84,
+    SR_PM_HI4 = 85,
+    SR_PM_HI5 = 86,
+    SR_PM_HI6 = 87,
+    SR_PM_HI7 = 88,
+    SR_CLOCKLO = 89,
+    SR_CLOCKHI = 90,
+    SR_GLOBALTIMERLO = 91,
+    SR_GLOBALTIMERHI = 92,
+    SR_HWTASKID = 105,
+    SR_CIRCULARQUEUEENTRYINDEX = 106,
+    SR_CIRCULARQUEUEENTRYADDRESSLOW = 107,
+    SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108,
+};
+
+[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
+    switch (special_register) {
+    case SpecialRegister::SR_TID_X:
+        return ir.LocalInvocationIdX();
+    case SpecialRegister::SR_TID_Y:
+        return ir.LocalInvocationIdY();
+    case SpecialRegister::SR_TID_Z:
+        return ir.LocalInvocationIdZ();
+    case SpecialRegister::SR_CTAID_X:
+        return ir.WorkgroupIdX();
+    case SpecialRegister::SR_CTAID_Y:
+        return ir.WorkgroupIdY();
+    case SpecialRegister::SR_CTAID_Z:
+        return ir.WorkgroupIdZ();
+    default:
+        throw NotImplementedException("S2R special register {}", special_register);
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::S2R(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<20, 8, SpecialRegister> src_reg;
+    } const s2r{insn};
+
+    X(s2r.dest_reg, Read(ir, s2r.src_reg));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 0f52696d10..d70399f6bf 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -7,21 +7,8 @@
 #include "shader_recompiler/frontend/maxwell/opcode.h"
 #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
 
-#include "shader_recompiler/ir_opt/passes.h"
-
 namespace Shader::Maxwell {
 
-[[maybe_unused]] static inline void DumpOptimized(IR::Block& block) {
-    auto raw{IR::DumpBlock(block)};
-
-    Optimization::GetSetElimination(block);
-    Optimization::DeadCodeEliminationPass(block);
-    Optimization::IdentityRemovalPass(block);
-    auto dumped{IR::DumpBlock(block)};
-
-    fmt::print(stderr, "{}", dumped);
-}
-
 [[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
     throw NotImplementedException("Instruction {} is not implemented", opcode);
 }
@@ -146,8 +133,8 @@ void TranslatorVisitor::DADD_imm(u64) {
     ThrowNotImplemented(Opcode::DADD_imm);
 }
 
-void TranslatorVisitor::DEPBAR(u64) {
-    ThrowNotImplemented(Opcode::DEPBAR);
+void TranslatorVisitor::DEPBAR() {
+    // DEPBAR is a no-op
 }
 
 void TranslatorVisitor::DFMA_reg(u64) {
@@ -230,22 +217,6 @@ void TranslatorVisitor::F2F_imm(u64) {
     ThrowNotImplemented(Opcode::F2F_imm);
 }
 
-void TranslatorVisitor::FADD_reg(u64) {
-    ThrowNotImplemented(Opcode::FADD_reg);
-}
-
-void TranslatorVisitor::FADD_cbuf(u64) {
-    ThrowNotImplemented(Opcode::FADD_cbuf);
-}
-
-void TranslatorVisitor::FADD_imm(u64) {
-    ThrowNotImplemented(Opcode::FADD_imm);
-}
-
-void TranslatorVisitor::FADD32I(u64) {
-    ThrowNotImplemented(Opcode::FADD32I);
-}
-
 void TranslatorVisitor::FCHK_reg(u64) {
     ThrowNotImplemented(Opcode::FCHK_reg);
 }
@@ -274,26 +245,6 @@ void TranslatorVisitor::FCMP_imm(u64) {
     ThrowNotImplemented(Opcode::FCMP_imm);
 }
 
-void TranslatorVisitor::FFMA_reg(u64) {
-    ThrowNotImplemented(Opcode::FFMA_reg);
-}
-
-void TranslatorVisitor::FFMA_rc(u64) {
-    ThrowNotImplemented(Opcode::FFMA_rc);
-}
-
-void TranslatorVisitor::FFMA_cr(u64) {
-    ThrowNotImplemented(Opcode::FFMA_cr);
-}
-
-void TranslatorVisitor::FFMA_imm(u64) {
-    ThrowNotImplemented(Opcode::FFMA_imm);
-}
-
-void TranslatorVisitor::FFMA32I(u64) {
-    ThrowNotImplemented(Opcode::FFMA32I);
-}
-
 void TranslatorVisitor::FLO_reg(u64) {
     ThrowNotImplemented(Opcode::FLO_reg);
 }
@@ -318,22 +269,6 @@ void TranslatorVisitor::FMNMX_imm(u64) {
     ThrowNotImplemented(Opcode::FMNMX_imm);
 }
 
-void TranslatorVisitor::FMUL_reg(u64) {
-    ThrowNotImplemented(Opcode::FMUL_reg);
-}
-
-void TranslatorVisitor::FMUL_cbuf(u64) {
-    ThrowNotImplemented(Opcode::FMUL_cbuf);
-}
-
-void TranslatorVisitor::FMUL_imm(u64) {
-    ThrowNotImplemented(Opcode::FMUL_imm);
-}
-
-void TranslatorVisitor::FMUL32I(u64) {
-    ThrowNotImplemented(Opcode::FMUL32I);
-}
-
 void TranslatorVisitor::FSET_reg(u64) {
     ThrowNotImplemented(Opcode::FSET_reg);
 }
@@ -470,18 +405,6 @@ void TranslatorVisitor::I2I_imm(u64) {
     ThrowNotImplemented(Opcode::I2I_imm);
 }
 
-void TranslatorVisitor::IADD_reg(u64) {
-    ThrowNotImplemented(Opcode::IADD_reg);
-}
-
-void TranslatorVisitor::IADD_cbuf(u64) {
-    ThrowNotImplemented(Opcode::IADD_cbuf);
-}
-
-void TranslatorVisitor::IADD_imm(u64) {
-    ThrowNotImplemented(Opcode::IADD_imm);
-}
-
 void TranslatorVisitor::IADD3_reg(u64) {
     ThrowNotImplemented(Opcode::IADD3_reg);
 }
@@ -494,10 +417,6 @@ void TranslatorVisitor::IADD3_imm(u64) {
     ThrowNotImplemented(Opcode::IADD3_imm);
 }
 
-void TranslatorVisitor::IADD32I(u64) {
-    ThrowNotImplemented(Opcode::IADD32I);
-}
-
 void TranslatorVisitor::ICMP_reg(u64) {
     ThrowNotImplemented(Opcode::ICMP_reg);
 }
@@ -594,22 +513,6 @@ void TranslatorVisitor::ISBERD(u64) {
     ThrowNotImplemented(Opcode::ISBERD);
 }
 
-void TranslatorVisitor::ISCADD_reg(u64) {
-    ThrowNotImplemented(Opcode::ISCADD_reg);
-}
-
-void TranslatorVisitor::ISCADD_cbuf(u64) {
-    ThrowNotImplemented(Opcode::ISCADD_cbuf);
-}
-
-void TranslatorVisitor::ISCADD_imm(u64) {
-    ThrowNotImplemented(Opcode::ISCADD_imm);
-}
-
-void TranslatorVisitor::ISCADD32I(u64) {
-    ThrowNotImplemented(Opcode::ISCADD32I);
-}
-
 void TranslatorVisitor::ISET_reg(u64) {
     ThrowNotImplemented(Opcode::ISET_reg);
 }
@@ -622,18 +525,6 @@ void TranslatorVisitor::ISET_imm(u64) {
     ThrowNotImplemented(Opcode::ISET_imm);
 }
 
-void TranslatorVisitor::ISETP_reg(u64) {
-    ThrowNotImplemented(Opcode::ISETP_reg);
-}
-
-void TranslatorVisitor::ISETP_cbuf(u64) {
-    ThrowNotImplemented(Opcode::ISETP_cbuf);
-}
-
-void TranslatorVisitor::ISETP_imm(u64) {
-    ThrowNotImplemented(Opcode::ISETP_imm);
-}
-
 void TranslatorVisitor::JCAL(u64) {
     ThrowNotImplemented(Opcode::JCAL);
 }
@@ -658,10 +549,6 @@ void TranslatorVisitor::LDC(u64) {
     ThrowNotImplemented(Opcode::LDC);
 }
 
-void TranslatorVisitor::LDG(u64) {
-    ThrowNotImplemented(Opcode::LDG);
-}
-
 void TranslatorVisitor::LDL(u64) {
     ThrowNotImplemented(Opcode::LDL);
 }
@@ -866,10 +753,6 @@ void TranslatorVisitor::RTT(u64) {
     ThrowNotImplemented(Opcode::RTT);
 }
 
-void TranslatorVisitor::S2R(u64) {
-    ThrowNotImplemented(Opcode::S2R);
-}
-
 void TranslatorVisitor::SAM(u64) {
     ThrowNotImplemented(Opcode::SAM);
 }
@@ -914,18 +797,6 @@ void TranslatorVisitor::SHFL(u64) {
     ThrowNotImplemented(Opcode::SHFL);
 }
 
-void TranslatorVisitor::SHL_reg(u64) {
-    ThrowNotImplemented(Opcode::SHL_reg);
-}
-
-void TranslatorVisitor::SHL_cbuf(u64) {
-    ThrowNotImplemented(Opcode::SHL_cbuf);
-}
-
-void TranslatorVisitor::SHL_imm(u64) {
-    ThrowNotImplemented(Opcode::SHL_imm);
-}
-
 void TranslatorVisitor::SHR_reg(u64) {
     ThrowNotImplemented(Opcode::SHR_reg);
 }
@@ -1086,20 +957,4 @@ void TranslatorVisitor::VSHR(u64) {
     ThrowNotImplemented(Opcode::VSHR);
 }
 
-void TranslatorVisitor::XMAD_reg(u64) {
-    ThrowNotImplemented(Opcode::XMAD_reg);
-}
-
-void TranslatorVisitor::XMAD_rc(u64) {
-    ThrowNotImplemented(Opcode::XMAD_rc);
-}
-
-void TranslatorVisitor::XMAD_cr(u64) {
-    ThrowNotImplemented(Opcode::XMAD_cr);
-}
-
-void TranslatorVisitor::XMAD_imm(u64) {
-    ThrowNotImplemented(Opcode::XMAD_imm);
-}
-
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp
deleted file mode 100644
index 7fa35ba3a2..0000000000
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "shader_recompiler/exception.h"
-#include "shader_recompiler/frontend/maxwell/opcode.h"
-#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
-
-namespace Shader::Maxwell {
-namespace {
-union MOV {
-    u64 raw;
-    BitField<0, 8, IR::Reg> dest_reg;
-    BitField<20, 8, IR::Reg> src_reg;
-    BitField<39, 4, u64> mask;
-};
-
-void CheckMask(MOV mov) {
-    if (mov.mask != 0xf) {
-        throw NotImplementedException("Non-full move mask");
-    }
-}
-} // Anonymous namespace
-
-void TranslatorVisitor::MOV_reg(u64 insn) {
-    const MOV mov{insn};
-    CheckMask(mov);
-    X(mov.dest_reg, X(mov.src_reg));
-}
-
-void TranslatorVisitor::MOV_cbuf(u64 insn) {
-    const MOV mov{insn};
-    CheckMask(mov);
-    X(mov.dest_reg, GetCbuf(insn));
-}
-
-void TranslatorVisitor::MOV_imm(u64 insn) {
-    const MOV mov{insn};
-    CheckMask(mov);
-    X(mov.dest_reg, GetImm(insn));
-}
-
-} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp b/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp
deleted file mode 100644
index 21b8526cd9..0000000000
--- a/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-
-#include "shader_recompiler/frontend/ir/basic_block.h"
-#include "shader_recompiler/frontend/ir/microinstruction.h"
-#include "shader_recompiler/ir_opt/passes.h"
-
-namespace Shader::Optimization {
-namespace {
-using Iterator = IR::Block::iterator;
-
-enum class TrackingType {
-    Reg,
-};
-
-struct RegisterInfo {
-    IR::Value register_value;
-    TrackingType tracking_type;
-    Iterator last_set_instruction;
-    bool set_instruction_present = false;
-};
-
-void DoSet(IR::Block& block, RegisterInfo& info, IR::Value value, Iterator set_inst,
-           TrackingType tracking_type) {
-    if (info.set_instruction_present) {
-        info.last_set_instruction->Invalidate();
-        block.Instructions().erase(info.last_set_instruction);
-    }
-    info.register_value = value;
-    info.tracking_type = tracking_type;
-    info.set_instruction_present = true;
-    info.last_set_instruction = set_inst;
-}
-
-RegisterInfo Nothing(Iterator get_inst, TrackingType tracking_type) {
-    RegisterInfo info{};
-    info.register_value = IR::Value{&*get_inst};
-    info.tracking_type = tracking_type;
-    return info;
-}
-
-void DoGet(RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) {
-    if (info.register_value.IsEmpty()) {
-        info = Nothing(get_inst, tracking_type);
-        return;
-    }
-    if (info.tracking_type == tracking_type) {
-        get_inst->ReplaceUsesWith(info.register_value);
-        return;
-    }
-    info = Nothing(get_inst, tracking_type);
-}
-} // Anonymous namespace
-
-void GetSetElimination(IR::Block& block) {
-    std::array<RegisterInfo, 255> reg_info;
-
-    for (Iterator inst = block.begin(); inst != block.end(); ++inst) {
-        switch (inst->Opcode()) {
-        case IR::Opcode::GetRegister: {
-            const IR::Reg reg{inst->Arg(0).Reg()};
-            if (reg == IR::Reg::RZ) {
-                break;
-            }
-            const size_t index{static_cast<size_t>(reg)};
-            DoGet(reg_info.at(index), inst, TrackingType::Reg);
-            break;
-        }
-        case IR::Opcode::SetRegister: {
-            const IR::Reg reg{inst->Arg(0).Reg()};
-            if (reg == IR::Reg::RZ) {
-                break;
-            }
-            const size_t index{static_cast<size_t>(reg)};
-            DoSet(block, reg_info.at(index), inst->Arg(1), inst, TrackingType::Reg);
-            break;
-        }
-        default:
-            break;
-        }
-    }
-}
-
-} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 83f094d735..7ed4005ed1 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -17,7 +17,6 @@ void Invoke(Func&& func, IR::Function& function) {
 }
 
 void DeadCodeEliminationPass(IR::Block& block);
-void GetSetElimination(IR::Block& block);
 void IdentityRemovalPass(IR::Block& block);
 void SsaRewritePass(IR::Function& function);
 void VerificationPass(const IR::Block& block);
diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp
index e3c9ad6e8f..4022c6fe2a 100644
--- a/src/shader_recompiler/main.cpp
+++ b/src/shader_recompiler/main.cpp
@@ -51,7 +51,8 @@ void RunDatabase() {
 int main() {
     // RunDatabase();
 
-    FileEnvironment env{"D:\\Shaders\\Database\\test.bin"};
+    // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"};
+    FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"};
     auto cfg{std::make_unique<Flow::CFG>(env, 0)};
     // fmt::print(stdout, "{}\n", cfg->Dot());
 
-- 
cgit v1.2.3-70-g09d2


From be94ee88d227d0d3dbeabe9ade98bacd910c7a7e Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 5 Feb 2021 19:19:36 -0300
Subject: shader: Make typed IR

---
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 275 ++++++++++++++-------
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  69 +++---
 src/shader_recompiler/frontend/ir/opcode.inc       | 200 ++++++++-------
 src/shader_recompiler/frontend/ir/type.cpp         |   4 +-
 src/shader_recompiler/frontend/ir/type.h           |  15 ++
 src/shader_recompiler/frontend/ir/value.cpp        |  28 +++
 src/shader_recompiler/frontend/ir/value.h          |  10 +
 .../maxwell/translate/impl/floating_point_add.cpp  |  12 +-
 .../impl/floating_point_conversion_integer.cpp     |  20 +-
 .../impl/floating_point_fused_multiply_add.cpp     |  16 +-
 .../impl/floating_point_multi_function.cpp         |   6 +-
 .../translate/impl/floating_point_multiply.cpp     |  13 +-
 .../frontend/maxwell/translate/impl/impl.cpp       |  20 ++
 .../frontend/maxwell/translate/impl/impl.h         |   6 +
 .../translate/impl/load_store_attribute.cpp        |  23 +-
 .../maxwell/translate/impl/load_store_memory.cpp   |   4 +-
 .../ir_opt/constant_propagation_pass.cpp           |  20 ++
 .../global_memory_to_storage_buffer_pass.cpp       |  21 +-
 src/shader_recompiler/main.cpp                     |   2 +-
 19 files changed, 495 insertions(+), 269 deletions(-)

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 1c5ae0109b..9d7dc034c9 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -32,16 +32,16 @@ U32 IREmitter::Imm32(s32 value) const {
     return U32{Value{static_cast<u32>(value)}};
 }
 
-U32 IREmitter::Imm32(f32 value) const {
-    return U32{Value{Common::BitCast<u32>(value)}};
+F32 IREmitter::Imm32(f32 value) const {
+    return F32{Value{value}};
 }
 
 U64 IREmitter::Imm64(u64 value) const {
     return U64{Value{value}};
 }
 
-U64 IREmitter::Imm64(f64 value) const {
-    return U64{Value{Common::BitCast<u64>(value)}};
+F64 IREmitter::Imm64(f64 value) const {
+    return F64{Value{value}};
 }
 
 void IREmitter::Branch(IR::Block* label) {
@@ -121,11 +121,11 @@ void IREmitter::SetOFlag(const U1& value) {
     Inst(Opcode::SetOFlag, value);
 }
 
-U32 IREmitter::GetAttribute(IR::Attribute attribute) {
-    return Inst<U32>(Opcode::GetAttribute, attribute);
+F32 IREmitter::GetAttribute(IR::Attribute attribute) {
+    return Inst<F32>(Opcode::GetAttribute, attribute);
 }
 
-void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) {
+void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) {
     Inst(Opcode::SetAttribute, attribute, value);
 }
 
@@ -225,50 +225,113 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) {
     return Inst<U1>(Opcode::GetOverflowFromOp, op);
 }
 
-U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) {
+F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
     if (a.Type() != a.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
     }
     switch (a.Type()) {
-    case Type::U16:
-        return Inst<U16>(Opcode::FPAdd16, Flags{control}, a, b);
-    case Type::U32:
-        return Inst<U32>(Opcode::FPAdd32, Flags{control}, a, b);
-    case Type::U64:
-        return Inst<U64>(Opcode::FPAdd64, Flags{control}, a, b);
+    case Type::F16:
+        return Inst<F16>(Opcode::FPAdd16, Flags{control}, a, b);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPAdd32, Flags{control}, a, b);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPAdd64, Flags{control}, a, b);
     default:
         ThrowInvalidType(a.Type());
     }
 }
 
-Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) {
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
     if (e1.Type() != e2.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
     }
-    return Inst(Opcode::CompositeConstruct2, e1, e2);
+    switch (e1.Type()) {
+    case Type::U32:
+        return Inst(Opcode::CompositeConstructU32x2, e1, e2);
+    case Type::F16:
+        return Inst(Opcode::CompositeConstructF16x2, e1, e2);
+    case Type::F32:
+        return Inst(Opcode::CompositeConstructF32x2, e1, e2);
+    case Type::F64:
+        return Inst(Opcode::CompositeConstructF64x2, e1, e2);
+    default:
+        ThrowInvalidType(e1.Type());
+    }
 }
 
-Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) {
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) {
     if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
         throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
     }
-    return Inst(Opcode::CompositeConstruct3, e1, e2, e3);
+    switch (e1.Type()) {
+    case Type::U32:
+        return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3);
+    case Type::F16:
+        return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3);
+    case Type::F32:
+        return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3);
+    case Type::F64:
+        return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3);
+    default:
+        ThrowInvalidType(e1.Type());
+    }
 }
 
-Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3,
-                                    const UAny& e4) {
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
+                                    const Value& e4) {
     if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
         throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
                               e3.Type(), e4.Type());
     }
-    return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4);
+    switch (e1.Type()) {
+    case Type::U32:
+        return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4);
+    case Type::F16:
+        return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4);
+    case Type::F32:
+        return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4);
+    case Type::F64:
+        return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4);
+    default:
+        ThrowInvalidType(e1.Type());
+    }
 }
 
-UAny IREmitter::CompositeExtract(const Value& vector, size_t element) {
-    if (element >= 4) {
-        throw InvalidArgument("Out of bounds element {}", element);
+Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
+    const auto read = [&](Opcode opcode, size_t limit) -> Value {
+        if (element >= limit) {
+            throw InvalidArgument("Out of bounds element {}", element);
+        }
+        return Inst(opcode, vector, Value{static_cast<u32>(element)});
+    };
+    switch (vector.Type()) {
+    case Type::U32x2:
+        return read(Opcode::CompositeExtractU32x2, 2);
+    case Type::U32x3:
+        return read(Opcode::CompositeExtractU32x3, 3);
+    case Type::U32x4:
+        return read(Opcode::CompositeExtractU32x4, 4);
+    case Type::F16x2:
+        return read(Opcode::CompositeExtractF16x2, 2);
+    case Type::F16x3:
+        return read(Opcode::CompositeExtractF16x3, 3);
+    case Type::F16x4:
+        return read(Opcode::CompositeExtractF16x4, 4);
+    case Type::F32x2:
+        return read(Opcode::CompositeExtractF32x2, 2);
+    case Type::F32x3:
+        return read(Opcode::CompositeExtractF32x3, 3);
+    case Type::F32x4:
+        return read(Opcode::CompositeExtractF32x4, 4);
+    case Type::F64x2:
+        return read(Opcode::CompositeExtractF64x2, 2);
+    case Type::F64x3:
+        return read(Opcode::CompositeExtractF64x3, 3);
+    case Type::F64x4:
+        return read(Opcode::CompositeExtractF64x4, 4);
+    default:
+        ThrowInvalidType(vector.Type());
     }
-    return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element)));
 }
 
 UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) {
@@ -289,6 +352,36 @@ UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny&
     }
 }
 
+template <>
+IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) {
+    return Inst<IR::U32>(Opcode::BitCastU32F32, value);
+}
+
+template <>
+IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) {
+    return Inst<IR::F32>(Opcode::BitCastF32U32, value);
+}
+
+template <>
+IR::U16 IREmitter::BitCast<IR::U16, IR::F16>(const IR::F16& value) {
+    return Inst<IR::U16>(Opcode::BitCastU16F16, value);
+}
+
+template <>
+IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) {
+    return Inst<IR::F16>(Opcode::BitCastF16U16, value);
+}
+
+template <>
+IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) {
+    return Inst<IR::U64>(Opcode::BitCastU64F64, value);
+}
+
+template <>
+IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) {
+    return Inst<IR::F64>(Opcode::BitCastF64U64, value);
+}
+
 U64 IREmitter::PackUint2x32(const Value& vector) {
     return Inst<U64>(Opcode::PackUint2x32, vector);
 }
@@ -305,75 +398,75 @@ Value IREmitter::UnpackFloat2x16(const U32& value) {
     return Inst<Value>(Opcode::UnpackFloat2x16, value);
 }
 
-U64 IREmitter::PackDouble2x32(const Value& vector) {
-    return Inst<U64>(Opcode::PackDouble2x32, vector);
+F64 IREmitter::PackDouble2x32(const Value& vector) {
+    return Inst<F64>(Opcode::PackDouble2x32, vector);
 }
 
-Value IREmitter::UnpackDouble2x32(const U64& value) {
+Value IREmitter::UnpackDouble2x32(const F64& value) {
     return Inst<Value>(Opcode::UnpackDouble2x32, value);
 }
 
-U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) {
+F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) {
     if (a.Type() != b.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
     }
     switch (a.Type()) {
-    case Type::U16:
-        return Inst<U16>(Opcode::FPMul16, Flags{control}, a, b);
-    case Type::U32:
-        return Inst<U32>(Opcode::FPMul32, Flags{control}, a, b);
-    case Type::U64:
-        return Inst<U64>(Opcode::FPMul64, Flags{control}, a, b);
+    case Type::F16:
+        return Inst<F16>(Opcode::FPMul16, Flags{control}, a, b);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPMul32, Flags{control}, a, b);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPMul64, Flags{control}, a, b);
     default:
         ThrowInvalidType(a.Type());
     }
 }
 
-U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c,
+F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
                            FpControl control) {
     if (a.Type() != b.Type() || a.Type() != c.Type()) {
         throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
     }
     switch (a.Type()) {
-    case Type::U16:
-        return Inst<U16>(Opcode::FPFma16, Flags{control}, a, b, c);
-    case Type::U32:
-        return Inst<U32>(Opcode::FPFma32, Flags{control}, a, b, c);
-    case Type::U64:
-        return Inst<U64>(Opcode::FPFma64, Flags{control}, a, b, c);
+    case Type::F16:
+        return Inst<F16>(Opcode::FPFma16, Flags{control}, a, b, c);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPFma32, Flags{control}, a, b, c);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPFma64, Flags{control}, a, b, c);
     default:
         ThrowInvalidType(a.Type());
     }
 }
 
-U16U32U64 IREmitter::FPAbs(const U16U32U64& value) {
+F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {
     switch (value.Type()) {
     case Type::U16:
-        return Inst<U16>(Opcode::FPAbs16, value);
+        return Inst<F16>(Opcode::FPAbs16, value);
     case Type::U32:
-        return Inst<U32>(Opcode::FPAbs32, value);
+        return Inst<F32>(Opcode::FPAbs32, value);
     case Type::U64:
-        return Inst<U64>(Opcode::FPAbs64, value);
+        return Inst<F64>(Opcode::FPAbs64, value);
     default:
         ThrowInvalidType(value.Type());
     }
 }
 
-U16U32U64 IREmitter::FPNeg(const U16U32U64& value) {
+F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {
     switch (value.Type()) {
     case Type::U16:
-        return Inst<U16>(Opcode::FPNeg16, value);
+        return Inst<F16>(Opcode::FPNeg16, value);
     case Type::U32:
-        return Inst<U32>(Opcode::FPNeg32, value);
+        return Inst<F32>(Opcode::FPNeg32, value);
     case Type::U64:
-        return Inst<U64>(Opcode::FPNeg64, value);
+        return Inst<F64>(Opcode::FPNeg64, value);
     default:
         ThrowInvalidType(value.Type());
     }
 }
 
-U16U32U64 IREmitter::FPAbsNeg(const U16U32U64& value, bool abs, bool neg) {
-    U16U32U64 result{value};
+F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
+    F16F32F64 result{value};
     if (abs) {
         result = FPAbs(value);
     }
@@ -383,108 +476,108 @@ U16U32U64 IREmitter::FPAbsNeg(const U16U32U64& value, bool abs, bool neg) {
     return result;
 }
 
-U32 IREmitter::FPCosNotReduced(const U32& value) {
-    return Inst<U32>(Opcode::FPCosNotReduced, value);
+F32 IREmitter::FPCosNotReduced(const F32& value) {
+    return Inst<F32>(Opcode::FPCosNotReduced, value);
 }
 
-U32 IREmitter::FPExp2NotReduced(const U32& value) {
-    return Inst<U32>(Opcode::FPExp2NotReduced, value);
+F32 IREmitter::FPExp2NotReduced(const F32& value) {
+    return Inst<F32>(Opcode::FPExp2NotReduced, value);
 }
 
-U32 IREmitter::FPLog2(const U32& value) {
-    return Inst<U32>(Opcode::FPLog2, value);
+F32 IREmitter::FPLog2(const F32& value) {
+    return Inst<F32>(Opcode::FPLog2, value);
 }
 
-U32U64 IREmitter::FPRecip(const U32U64& value) {
+F32F64 IREmitter::FPRecip(const F32F64& value) {
     switch (value.Type()) {
     case Type::U32:
-        return Inst<U32>(Opcode::FPRecip32, value);
+        return Inst<F32>(Opcode::FPRecip32, value);
     case Type::U64:
-        return Inst<U64>(Opcode::FPRecip64, value);
+        return Inst<F64>(Opcode::FPRecip64, value);
     default:
         ThrowInvalidType(value.Type());
     }
 }
 
-U32U64 IREmitter::FPRecipSqrt(const U32U64& value) {
+F32F64 IREmitter::FPRecipSqrt(const F32F64& value) {
     switch (value.Type()) {
     case Type::U32:
-        return Inst<U32>(Opcode::FPRecipSqrt32, value);
+        return Inst<F32>(Opcode::FPRecipSqrt32, value);
     case Type::U64:
-        return Inst<U64>(Opcode::FPRecipSqrt64, value);
+        return Inst<F64>(Opcode::FPRecipSqrt64, value);
     default:
         ThrowInvalidType(value.Type());
     }
 }
 
-U32 IREmitter::FPSinNotReduced(const U32& value) {
-    return Inst<U32>(Opcode::FPSinNotReduced, value);
+F32 IREmitter::FPSinNotReduced(const F32& value) {
+    return Inst<F32>(Opcode::FPSinNotReduced, value);
 }
 
-U32 IREmitter::FPSqrt(const U32& value) {
-    return Inst<U32>(Opcode::FPSqrt, value);
+F32 IREmitter::FPSqrt(const F32& value) {
+    return Inst<F32>(Opcode::FPSqrt, value);
 }
 
-U16U32U64 IREmitter::FPSaturate(const U16U32U64& value) {
+F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
     switch (value.Type()) {
     case Type::U16:
-        return Inst<U16>(Opcode::FPSaturate16, value);
+        return Inst<F16>(Opcode::FPSaturate16, value);
     case Type::U32:
-        return Inst<U32>(Opcode::FPSaturate32, value);
+        return Inst<F32>(Opcode::FPSaturate32, value);
     case Type::U64:
-        return Inst<U64>(Opcode::FPSaturate64, value);
+        return Inst<F64>(Opcode::FPSaturate64, value);
     default:
         ThrowInvalidType(value.Type());
     }
 }
 
-U16U32U64 IREmitter::FPRoundEven(const U16U32U64& value) {
+F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) {
     switch (value.Type()) {
     case Type::U16:
-        return Inst<U16>(Opcode::FPRoundEven16, value);
+        return Inst<F16>(Opcode::FPRoundEven16, value);
     case Type::U32:
-        return Inst<U32>(Opcode::FPRoundEven32, value);
+        return Inst<F32>(Opcode::FPRoundEven32, value);
     case Type::U64:
-        return Inst<U64>(Opcode::FPRoundEven64, value);
+        return Inst<F64>(Opcode::FPRoundEven64, value);
     default:
         ThrowInvalidType(value.Type());
     }
 }
 
-U16U32U64 IREmitter::FPFloor(const U16U32U64& value) {
+F16F32F64 IREmitter::FPFloor(const F16F32F64& value) {
     switch (value.Type()) {
     case Type::U16:
-        return Inst<U16>(Opcode::FPFloor16, value);
+        return Inst<F16>(Opcode::FPFloor16, value);
     case Type::U32:
-        return Inst<U32>(Opcode::FPFloor32, value);
+        return Inst<F32>(Opcode::FPFloor32, value);
     case Type::U64:
-        return Inst<U64>(Opcode::FPFloor64, value);
+        return Inst<F64>(Opcode::FPFloor64, value);
     default:
         ThrowInvalidType(value.Type());
     }
 }
 
-U16U32U64 IREmitter::FPCeil(const U16U32U64& value) {
+F16F32F64 IREmitter::FPCeil(const F16F32F64& value) {
     switch (value.Type()) {
     case Type::U16:
-        return Inst<U16>(Opcode::FPCeil16, value);
+        return Inst<F16>(Opcode::FPCeil16, value);
     case Type::U32:
-        return Inst<U32>(Opcode::FPCeil32, value);
+        return Inst<F32>(Opcode::FPCeil32, value);
     case Type::U64:
-        return Inst<U64>(Opcode::FPCeil64, value);
+        return Inst<F64>(Opcode::FPCeil64, value);
     default:
         ThrowInvalidType(value.Type());
     }
 }
 
-U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) {
+F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) {
     switch (value.Type()) {
     case Type::U16:
-        return Inst<U16>(Opcode::FPTrunc16, value);
+        return Inst<F16>(Opcode::FPTrunc16, value);
     case Type::U32:
-        return Inst<U32>(Opcode::FPTrunc32, value);
+        return Inst<F32>(Opcode::FPTrunc32, value);
     case Type::U64:
-        return Inst<U64>(Opcode::FPTrunc64, value);
+        return Inst<F64>(Opcode::FPTrunc64, value);
     default:
         ThrowInvalidType(value.Type());
     }
@@ -605,7 +698,7 @@ U1 IREmitter::LogicalNot(const U1& value) {
     return Inst<U1>(Opcode::LogicalNot, value);
 }
 
-U32U64 IREmitter::ConvertFToS(size_t bitsize, const U16U32U64& value) {
+U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) {
     switch (bitsize) {
     case 16:
         switch (value.Type()) {
@@ -645,7 +738,7 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const U16U32U64& value) {
     }
 }
 
-U32U64 IREmitter::ConvertFToU(size_t bitsize, const U16U32U64& value) {
+U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) {
     switch (bitsize) {
     case 16:
         switch (value.Type()) {
@@ -685,7 +778,7 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const U16U32U64& value) {
     }
 }
 
-U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value) {
+U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) {
     if (is_signed) {
         return ConvertFToS(bitsize, value);
     } else {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 84b844898f..bfd9916cca 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -27,9 +27,9 @@ public:
     [[nodiscard]] U16 Imm16(u16 value) const;
     [[nodiscard]] U32 Imm32(u32 value) const;
     [[nodiscard]] U32 Imm32(s32 value) const;
-    [[nodiscard]] U32 Imm32(f32 value) const;
+    [[nodiscard]] F32 Imm32(f32 value) const;
     [[nodiscard]] U64 Imm64(u64 value) const;
-    [[nodiscard]] U64 Imm64(f64 value) const;
+    [[nodiscard]] F64 Imm64(f64 value) const;
 
     void Branch(IR::Block* label);
     void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label);
@@ -55,8 +55,8 @@ public:
     void SetCFlag(const U1& value);
     void SetOFlag(const U1& value);
 
-    [[nodiscard]] U32 GetAttribute(IR::Attribute attribute);
-    void SetAttribute(IR::Attribute attribute, const U32& value);
+    [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
+    void SetAttribute(IR::Attribute attribute, const F32& value);
 
     [[nodiscard]] U32 WorkgroupIdX();
     [[nodiscard]] U32 WorkgroupIdY();
@@ -87,44 +87,47 @@ public:
     [[nodiscard]] U1 GetCarryFromOp(const Value& op);
     [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
 
-    [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2);
-    [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3);
-    [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3,
-                                           const UAny& e4);
-    [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element);
+    [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
+    [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
+    [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
+                                           const Value& e4);
+    [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
 
     [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value);
 
+    template <typename Dest, typename Source>
+    [[nodiscard]] Dest BitCast(const Source& value);
+
     [[nodiscard]] U64 PackUint2x32(const Value& vector);
     [[nodiscard]] Value UnpackUint2x32(const U64& value);
 
     [[nodiscard]] U32 PackFloat2x16(const Value& vector);
     [[nodiscard]] Value UnpackFloat2x16(const U32& value);
 
-    [[nodiscard]] U64 PackDouble2x32(const Value& vector);
-    [[nodiscard]] Value UnpackDouble2x32(const U64& value);
+    [[nodiscard]] F64 PackDouble2x32(const Value& vector);
+    [[nodiscard]] Value UnpackDouble2x32(const F64& value);
 
-    [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {});
-    [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {});
-    [[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c,
+    [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
+    [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
+    [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
                                   FpControl control = {});
 
-    [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value);
-    [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value);
-    [[nodiscard]] U16U32U64 FPAbsNeg(const U16U32U64& value, bool abs, bool neg);
-
-    [[nodiscard]] U32 FPCosNotReduced(const U32& value);
-    [[nodiscard]] U32 FPExp2NotReduced(const U32& value);
-    [[nodiscard]] U32 FPLog2(const U32& value);
-    [[nodiscard]] U32U64 FPRecip(const U32U64& value);
-    [[nodiscard]] U32U64 FPRecipSqrt(const U32U64& value);
-    [[nodiscard]] U32 FPSinNotReduced(const U32& value);
-    [[nodiscard]] U32 FPSqrt(const U32& value);
-    [[nodiscard]] U16U32U64 FPSaturate(const U16U32U64& value);
-    [[nodiscard]] U16U32U64 FPRoundEven(const U16U32U64& value);
-    [[nodiscard]] U16U32U64 FPFloor(const U16U32U64& value);
-    [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value);
-    [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value);
+    [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value);
+    [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
+    [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);
+
+    [[nodiscard]] F32 FPCosNotReduced(const F32& value);
+    [[nodiscard]] F32 FPExp2NotReduced(const F32& value);
+    [[nodiscard]] F32 FPLog2(const F32& value);
+    [[nodiscard]] F32F64 FPRecip(const F32F64& value);
+    [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
+    [[nodiscard]] F32 FPSinNotReduced(const F32& value);
+    [[nodiscard]] F32 FPSqrt(const F32& value);
+    [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
+    [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value);
+    [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value);
+    [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value);
+    [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value);
 
     [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
     [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
@@ -154,9 +157,9 @@ public:
     [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
     [[nodiscard]] U1 LogicalNot(const U1& value);
 
-    [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value);
-    [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value);
-    [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value);
+    [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
+    [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
+    [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
 
     [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value);
 
diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc
index 4596bf39f7..6eb105d929 100644
--- a/src/shader_recompiler/frontend/ir/opcode.inc
+++ b/src/shader_recompiler/frontend/ir/opcode.inc
@@ -52,15 +52,15 @@ OPCODE(LoadGlobalS8,                                        U32,            U64,
 OPCODE(LoadGlobalU16,                                       U32,            U64,                                                            )
 OPCODE(LoadGlobalS16,                                       U32,            U64,                                                            )
 OPCODE(LoadGlobal32,                                        U32,            U64,                                                            )
-OPCODE(LoadGlobal64,                                        Opaque,         U64,                                                            )
-OPCODE(LoadGlobal128,                                       Opaque,         U64,                                                            )
+OPCODE(LoadGlobal64,                                        U32x2,          U64,                                                            )
+OPCODE(LoadGlobal128,                                       U32x4,          U64,                                                            )
 OPCODE(WriteGlobalU8,                                       Void,           U64,            U32,                                            )
 OPCODE(WriteGlobalS8,                                       Void,           U64,            U32,                                            )
 OPCODE(WriteGlobalU16,                                      Void,           U64,            U32,                                            )
 OPCODE(WriteGlobalS16,                                      Void,           U64,            U32,                                            )
 OPCODE(WriteGlobal32,                                       Void,           U64,            U32,                                            )
-OPCODE(WriteGlobal64,                                       Void,           U64,            Opaque,                                         )
-OPCODE(WriteGlobal128,                                      Void,           U64,            Opaque,                                         )
+OPCODE(WriteGlobal64,                                       Void,           U64,            U32x2,                                          )
+OPCODE(WriteGlobal128,                                      Void,           U64,            U32x4,                                          )
 
 // Storage buffer operations
 OPCODE(LoadStorageU8,                                       U32,            U32,            U32,                                            )
@@ -68,21 +68,41 @@ OPCODE(LoadStorageS8,                                       U32,            U32,
 OPCODE(LoadStorageU16,                                      U32,            U32,            U32,                                            )
 OPCODE(LoadStorageS16,                                      U32,            U32,            U32,                                            )
 OPCODE(LoadStorage32,                                       U32,            U32,            U32,                                            )
-OPCODE(LoadStorage64,                                       Opaque,         U32,            U32,                                            )
-OPCODE(LoadStorage128,                                      Opaque,         U32,            U32,                                            )
-OPCODE(WriteStorageU8,                                      Void,           U32,            U32,            U32,                                            )
-OPCODE(WriteStorageS8,                                      Void,           U32,            U32,            U32,                                            )
-OPCODE(WriteStorageU16,                                     Void,           U32,            U32,            U32,                                            )
-OPCODE(WriteStorageS16,                                     Void,           U32,            U32,            U32,                                            )
-OPCODE(WriteStorage32,                                      Void,           U32,            U32,            U32,                                            )
-OPCODE(WriteStorage64,                                      Void,           U32,            U32,            Opaque,                                         )
-OPCODE(WriteStorage128,                                     Void,           U32,            U32,            Opaque,                                         )
+OPCODE(LoadStorage64,                                       U32x2,          U32,            U32,                                            )
+OPCODE(LoadStorage128,                                      U32x4,          U32,            U32,                                            )
+OPCODE(WriteStorageU8,                                      Void,           U32,            U32,            U32,                            )
+OPCODE(WriteStorageS8,                                      Void,           U32,            U32,            U32,                            )
+OPCODE(WriteStorageU16,                                     Void,           U32,            U32,            U32,                            )
+OPCODE(WriteStorageS16,                                     Void,           U32,            U32,            U32,                            )
+OPCODE(WriteStorage32,                                      Void,           U32,            U32,            U32,                            )
+OPCODE(WriteStorage64,                                      Void,           U32,            U32,            U32x2,                          )
+OPCODE(WriteStorage128,                                     Void,           U32,            U32,            U32x4,                          )
 
 // Vector utility
-OPCODE(CompositeConstruct2,                                 Opaque,         Opaque,         Opaque,                                         )
-OPCODE(CompositeConstruct3,                                 Opaque,         Opaque,         Opaque,         Opaque,                         )
-OPCODE(CompositeConstruct4,                                 Opaque,         Opaque,         Opaque,         Opaque,         Opaque,         )
-OPCODE(CompositeExtract,                                    Opaque,         Opaque,         U32,                                            )
+OPCODE(CompositeConstructU32x2,                             U32x2,          U32,            U32,                                            )
+OPCODE(CompositeConstructU32x3,                             U32x3,          U32,            U32,            U32,                            )
+OPCODE(CompositeConstructU32x4,                             U32x4,          U32,            U32,            U32,            U32,            )
+OPCODE(CompositeExtractU32x2,                               U32,            U32x2,          U32,                                            )
+OPCODE(CompositeExtractU32x3,                               U32,            U32x3,          U32,                                            )
+OPCODE(CompositeExtractU32x4,                               U32,            U32x4,          U32,                                            )
+OPCODE(CompositeConstructF16x2,                             F16x2,          F16,            F16,                                            )
+OPCODE(CompositeConstructF16x3,                             F16x3,          F16,            F16,            F16,                            )
+OPCODE(CompositeConstructF16x4,                             F16x4,          F16,            F16,            F16,            F16,            )
+OPCODE(CompositeExtractF16x2,                               F16,            F16x2,          U32,                                            )
+OPCODE(CompositeExtractF16x3,                               F16,            F16x3,          U32,                                            )
+OPCODE(CompositeExtractF16x4,                               F16,            F16x4,          U32,                                            )
+OPCODE(CompositeConstructF32x2,                             F32x2,          F32,            F32,                                            )
+OPCODE(CompositeConstructF32x3,                             F32x3,          F32,            F32,            F32,                            )
+OPCODE(CompositeConstructF32x4,                             F32x4,          F32,            F32,            F32,            F32,            )
+OPCODE(CompositeExtractF32x2,                               F32,            F32x2,          U32,                                            )
+OPCODE(CompositeExtractF32x3,                               F32,            F32x3,          U32,                                            )
+OPCODE(CompositeExtractF32x4,                               F32,            F32x4,          U32,                                            )
+OPCODE(CompositeConstructF64x2,                             F64x2,          F64,            F64,                                            )
+OPCODE(CompositeConstructF64x3,                             F64x3,          F64,            F64,            F64,                            )
+OPCODE(CompositeConstructF64x4,                             F64x4,          F64,            F64,            F64,            F64,            )
+OPCODE(CompositeExtractF64x2,                               F64,            F64x2,          U32,                                            )
+OPCODE(CompositeExtractF64x3,                               F64,            F64x3,          U32,                                            )
+OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                            )
 
 // Select operations
 OPCODE(Select8,                                             U8,             U1,             U8,             U8,                             )
@@ -91,12 +111,18 @@ OPCODE(Select32,                                            U32,            U1,
 OPCODE(Select64,                                            U64,            U1,             U64,            U64,                            )
 
 // Bitwise conversions
-OPCODE(PackUint2x32,                                        U64,            Opaque,                                                         )
-OPCODE(UnpackUint2x32,                                      Opaque,         U64,                                                            )
-OPCODE(PackFloat2x16,                                       U32,            Opaque,                                                         )
-OPCODE(UnpackFloat2x16,                                     Opaque,         U32,                                                            )
-OPCODE(PackDouble2x32,                                      U64,            Opaque,                                                         )
-OPCODE(UnpackDouble2x32,                                    Opaque,         U64,                                                            )
+OPCODE(BitCastU16F16,                                       U16,            F16,                                                            )
+OPCODE(BitCastU32F32,                                       U32,            F32,                                                            )
+OPCODE(BitCastU64F64,                                       U64,            F64,                                                            )
+OPCODE(BitCastF16U16,                                       F16,            U16,                                                            )
+OPCODE(BitCastF32U32,                                       F32,            U32,                                                            )
+OPCODE(BitCastF64U64,                                       F64,            U64,                                                            )
+OPCODE(PackUint2x32,                                        U64,            U32x2,                                                          )
+OPCODE(UnpackUint2x32,                                      U32x2,          U64,                                                            )
+OPCODE(PackFloat2x16,                                       U32,            F16x2,                                                          )
+OPCODE(UnpackFloat2x16,                                     F16x2,          U32,                                                            )
+OPCODE(PackDouble2x32,                                      U64,            U32x2,                                                          )
+OPCODE(UnpackDouble2x32,                                    U32x2,          U64,                                                            )
 
 // Pseudo-operation, handled specially at final emit
 OPCODE(GetZeroFromOp,                                       U1,             Opaque,                                                         )
@@ -105,52 +131,52 @@ OPCODE(GetCarryFromOp,                                      U1,             Opaq
 OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                         )
 
 // Floating-point operations
-OPCODE(FPAbs16,                                             U16,            U16,                                                            )
-OPCODE(FPAbs32,                                             U32,            U32,                                                            )
-OPCODE(FPAbs64,                                             U64,            U64,                                                            )
-OPCODE(FPAdd16,                                             U16,            U16,            U16,                                            )
-OPCODE(FPAdd32,                                             U32,            U32,            U32,                                            )
-OPCODE(FPAdd64,                                             U64,            U64,            U64,                                            )
-OPCODE(FPFma16,                                             U16,            U16,            U16,            U16,                            )
-OPCODE(FPFma32,                                             U32,            U32,            U32,            U32,                            )
-OPCODE(FPFma64,                                             U64,            U64,            U64,            U64,                            )
-OPCODE(FPMax32,                                             U32,            U32,            U32,                                            )
-OPCODE(FPMax64,                                             U64,            U64,            U64,                                            )
-OPCODE(FPMin32,                                             U32,            U32,            U32,                                            )
-OPCODE(FPMin64,                                             U64,            U64,            U64,                                            )
-OPCODE(FPMul16,                                             U16,            U16,            U16,                                            )
-OPCODE(FPMul32,                                             U32,            U32,            U32,                                            )
-OPCODE(FPMul64,                                             U64,            U64,            U64,                                            )
-OPCODE(FPNeg16,                                             U16,            U16,                                                            )
-OPCODE(FPNeg32,                                             U32,            U32,                                                            )
-OPCODE(FPNeg64,                                             U64,            U64,                                                            )
-OPCODE(FPRecip32,                                           U32,            U32,                                                            )
-OPCODE(FPRecip64,                                           U64,            U64,                                                            )
-OPCODE(FPRecipSqrt32,                                       U32,            U32,                                                            )
-OPCODE(FPRecipSqrt64,                                       U64,            U64,                                                            )
-OPCODE(FPSqrt,                                              U32,            U32,                                                            )
-OPCODE(FPSin,                                               U32,            U32,                                                            )
-OPCODE(FPSinNotReduced,                                     U32,            U32,                                                            )
-OPCODE(FPExp2,                                              U32,            U32,                                                            )
-OPCODE(FPExp2NotReduced,                                    U32,            U32,                                                            )
-OPCODE(FPCos,                                               U32,            U32,                                                            )
-OPCODE(FPCosNotReduced,                                     U32,            U32,                                                            )
-OPCODE(FPLog2,                                              U32,            U32,                                                            )
-OPCODE(FPSaturate16,                                        U16,            U16,                                                            )
-OPCODE(FPSaturate32,                                        U32,            U32,                                                            )
-OPCODE(FPSaturate64,                                        U64,            U64,                                                            )
-OPCODE(FPRoundEven16,                                       U16,            U16,                                                            )
-OPCODE(FPRoundEven32,                                       U32,            U32,                                                            )
-OPCODE(FPRoundEven64,                                       U64,            U64,                                                            )
-OPCODE(FPFloor16,                                           U16,            U16,                                                            )
-OPCODE(FPFloor32,                                           U32,            U32,                                                            )
-OPCODE(FPFloor64,                                           U64,            U64,                                                            )
-OPCODE(FPCeil16,                                            U16,            U16,                                                            )
-OPCODE(FPCeil32,                                            U32,            U32,                                                            )
-OPCODE(FPCeil64,                                            U64,            U64,                                                            )
-OPCODE(FPTrunc16,                                           U16,            U16,                                                            )
-OPCODE(FPTrunc32,                                           U32,            U32,                                                            )
-OPCODE(FPTrunc64,                                           U64,            U64,                                                            )
+OPCODE(FPAbs16,                                             F16,            F16,                                                            )
+OPCODE(FPAbs32,                                             F32,            F32,                                                            )
+OPCODE(FPAbs64,                                             F64,            F64,                                                            )
+OPCODE(FPAdd16,                                             F16,            F16,            F16,                                            )
+OPCODE(FPAdd32,                                             F32,            F32,            F32,                                            )
+OPCODE(FPAdd64,                                             F64,            F64,            F64,                                            )
+OPCODE(FPFma16,                                             F16,            F16,            F16,            F16,                            )
+OPCODE(FPFma32,                                             F32,            F32,            F32,            F32,                            )
+OPCODE(FPFma64,                                             F64,            F64,            F64,            F64,                            )
+OPCODE(FPMax32,                                             F32,            F32,            F32,                                            )
+OPCODE(FPMax64,                                             F64,            F64,            F64,                                            )
+OPCODE(FPMin32,                                             F32,            F32,            F32,                                            )
+OPCODE(FPMin64,                                             F64,            F64,            F64,                                            )
+OPCODE(FPMul16,                                             F16,            F16,            F16,                                            )
+OPCODE(FPMul32,                                             F32,            F32,            F32,                                            )
+OPCODE(FPMul64,                                             F64,            F64,            F64,                                            )
+OPCODE(FPNeg16,                                             F16,            F16,                                                            )
+OPCODE(FPNeg32,                                             F32,            F32,                                                            )
+OPCODE(FPNeg64,                                             F64,            F64,                                                            )
+OPCODE(FPRecip32,                                           F32,            F32,                                                            )
+OPCODE(FPRecip64,                                           F64,            F64,                                                            )
+OPCODE(FPRecipSqrt32,                                       F32,            F32,                                                            )
+OPCODE(FPRecipSqrt64,                                       F64,            F64,                                                            )
+OPCODE(FPSqrt,                                              F32,            F32,                                                            )
+OPCODE(FPSin,                                               F32,            F32,                                                            )
+OPCODE(FPSinNotReduced,                                     F32,            F32,                                                            )
+OPCODE(FPExp2,                                              F32,            F32,                                                            )
+OPCODE(FPExp2NotReduced,                                    F32,            F32,                                                            )
+OPCODE(FPCos,                                               F32,            F32,                                                            )
+OPCODE(FPCosNotReduced,                                     F32,            F32,                                                            )
+OPCODE(FPLog2,                                              F32,            F32,                                                            )
+OPCODE(FPSaturate16,                                        F16,            F16,                                                            )
+OPCODE(FPSaturate32,                                        F32,            F32,                                                            )
+OPCODE(FPSaturate64,                                        F64,            F64,                                                            )
+OPCODE(FPRoundEven16,                                       F16,            F16,                                                            )
+OPCODE(FPRoundEven32,                                       F32,            F32,                                                            )
+OPCODE(FPRoundEven64,                                       F64,            F64,                                                            )
+OPCODE(FPFloor16,                                           F16,            F16,                                                            )
+OPCODE(FPFloor32,                                           F32,            F32,                                                            )
+OPCODE(FPFloor64,                                           F64,            F64,                                                            )
+OPCODE(FPCeil16,                                            F16,            F16,                                                            )
+OPCODE(FPCeil32,                                            F32,            F32,                                                            )
+OPCODE(FPCeil64,                                            F64,            F64,                                                            )
+OPCODE(FPTrunc16,                                           F16,            F16,                                                            )
+OPCODE(FPTrunc32,                                           F32,            F32,                                                            )
+OPCODE(FPTrunc64,                                           F64,            F64,                                                            )
 
 // Integer operations
 OPCODE(IAdd32,                                              U32,            U32,            U32,                                            )
@@ -188,24 +214,24 @@ OPCODE(LogicalXor,                                          U1,             U1,
 OPCODE(LogicalNot,                                          U1,             U1,                                                             )
 
 // Conversion operations
-OPCODE(ConvertS16F16,                                       U32,            U16,                                                            )
-OPCODE(ConvertS16F32,                                       U32,            U32,                                                            )
-OPCODE(ConvertS16F64,                                       U32,            U64,                                                            )
-OPCODE(ConvertS32F16,                                       U32,            U16,                                                            )
-OPCODE(ConvertS32F32,                                       U32,            U32,                                                            )
-OPCODE(ConvertS32F64,                                       U32,            U64,                                                            )
-OPCODE(ConvertS64F16,                                       U64,            U16,                                                            )
-OPCODE(ConvertS64F32,                                       U64,            U32,                                                            )
-OPCODE(ConvertS64F64,                                       U64,            U64,                                                            )
-OPCODE(ConvertU16F16,                                       U32,            U16,                                                            )
-OPCODE(ConvertU16F32,                                       U32,            U32,                                                            )
-OPCODE(ConvertU16F64,                                       U32,            U64,                                                            )
-OPCODE(ConvertU32F16,                                       U32,            U16,                                                            )
-OPCODE(ConvertU32F32,                                       U32,            U32,                                                            )
-OPCODE(ConvertU32F64,                                       U32,            U64,                                                            )
-OPCODE(ConvertU64F16,                                       U64,            U16,                                                            )
-OPCODE(ConvertU64F32,                                       U64,            U32,                                                            )
-OPCODE(ConvertU64F64,                                       U64,            U64,                                                            )
+OPCODE(ConvertS16F16,                                       U32,            F16,                                                            )
+OPCODE(ConvertS16F32,                                       U32,            F32,                                                            )
+OPCODE(ConvertS16F64,                                       U32,            F64,                                                            )
+OPCODE(ConvertS32F16,                                       U32,            F16,                                                            )
+OPCODE(ConvertS32F32,                                       U32,            F32,                                                            )
+OPCODE(ConvertS32F64,                                       U32,            F64,                                                            )
+OPCODE(ConvertS64F16,                                       U64,            F16,                                                            )
+OPCODE(ConvertS64F32,                                       U64,            F32,                                                            )
+OPCODE(ConvertS64F64,                                       U64,            F64,                                                            )
+OPCODE(ConvertU16F16,                                       U32,            F16,                                                            )
+OPCODE(ConvertU16F32,                                       U32,            F32,                                                            )
+OPCODE(ConvertU16F64,                                       U32,            F64,                                                            )
+OPCODE(ConvertU32F16,                                       U32,            F16,                                                            )
+OPCODE(ConvertU32F32,                                       U32,            F32,                                                            )
+OPCODE(ConvertU32F64,                                       U32,            F64,                                                            )
+OPCODE(ConvertU64F16,                                       U64,            F16,                                                            )
+OPCODE(ConvertU64F32,                                       U64,            F32,                                                            )
+OPCODE(ConvertU64F64,                                       U64,            F64,                                                            )
 
 OPCODE(ConvertU64U32,                                       U64,            U32,                                                            )
 OPCODE(ConvertU32U64,                                       U32,            U64,                                                            )
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp
index 13cc091956..f28341bfe7 100644
--- a/src/shader_recompiler/frontend/ir/type.cpp
+++ b/src/shader_recompiler/frontend/ir/type.cpp
@@ -11,7 +11,9 @@ namespace Shader::IR {
 
 std::string NameOf(Type type) {
     static constexpr std::array names{
-        "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64",
+        "Opaque", "Label", "Reg",   "Pred",  "Attribute", "U1",    "U8",    "U16",   "U32",
+        "U64",    "F16",   "F32",   "F64",   "U32x2",     "U32x3", "U32x4", "F16x2", "F16x3",
+        "F16x4",  "F32x2", "F32x3", "F32x4", "F64x2",     "F64x3", "F64x4",
     };
     const size_t bits{static_cast<size_t>(type)};
     if (bits == 0) {
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
index 397875018b..9a32ca1e8a 100644
--- a/src/shader_recompiler/frontend/ir/type.h
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -25,6 +25,21 @@ enum class Type {
     U16 = 1 << 7,
     U32 = 1 << 8,
     U64 = 1 << 9,
+    F16 = 1 << 10,
+    F32 = 1 << 11,
+    F64 = 1 << 12,
+    U32x2 = 1 << 13,
+    U32x3 = 1 << 14,
+    U32x4 = 1 << 15,
+    F16x2 = 1 << 16,
+    F16x3 = 1 << 17,
+    F16x4 = 1 << 18,
+    F32x2 = 1 << 19,
+    F32x3 = 1 << 20,
+    F32x4 = 1 << 21,
+    F64x2 = 1 << 22,
+    F64x3 = 1 << 23,
+    F64x4 = 1 << 24,
 };
 DECLARE_ENUM_FLAG_OPERATORS(Type)
 
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index 59a9b10dc9..93ff8ccf16 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -26,8 +26,12 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
 
 Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
 
+Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
+
 Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
 
+Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
+
 bool Value::IsIdentity() const noexcept {
     return type == Type::Opaque && inst->Opcode() == Opcode::Identity;
 }
@@ -122,6 +126,14 @@ u32 Value::U32() const {
     return imm_u32;
 }
 
+f32 Value::F32() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).F32();
+    }
+    ValidateAccess(Type::F32);
+    return imm_f32;
+}
+
 u64 Value::U64() const {
     if (IsIdentity()) {
         return inst->Arg(0).U64();
@@ -152,11 +164,27 @@ bool Value::operator==(const Value& other) const {
     case Type::U8:
         return imm_u8 == other.imm_u8;
     case Type::U16:
+    case Type::F16:
         return imm_u16 == other.imm_u16;
     case Type::U32:
+    case Type::F32:
         return imm_u32 == other.imm_u32;
     case Type::U64:
+    case Type::F64:
         return imm_u64 == other.imm_u64;
+    case Type::U32x2:
+    case Type::U32x3:
+    case Type::U32x4:
+    case Type::F16x2:
+    case Type::F16x3:
+    case Type::F16x4:
+    case Type::F32x2:
+    case Type::F32x3:
+    case Type::F32x4:
+    case Type::F64x2:
+    case Type::F64x3:
+    case Type::F64x4:
+        break;
     }
     throw LogicError("Invalid type {}", type);
 }
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 31f8317940..2f3688c736 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -28,7 +28,9 @@ public:
     explicit Value(u8 value) noexcept;
     explicit Value(u16 value) noexcept;
     explicit Value(u32 value) noexcept;
+    explicit Value(f32 value) noexcept;
     explicit Value(u64 value) noexcept;
+    explicit Value(f64 value) noexcept;
 
     [[nodiscard]] bool IsIdentity() const noexcept;
     [[nodiscard]] bool IsEmpty() const noexcept;
@@ -46,6 +48,7 @@ public:
     [[nodiscard]] u8 U8() const;
     [[nodiscard]] u16 U16() const;
     [[nodiscard]] u32 U32() const;
+    [[nodiscard]] f32 F32() const;
     [[nodiscard]] u64 U64() const;
 
     [[nodiscard]] bool operator==(const Value& other) const;
@@ -65,7 +68,9 @@ private:
         u8 imm_u8;
         u16 imm_u16;
         u32 imm_u32;
+        f32 imm_f32;
         u64 imm_u64;
+        f64 imm_f64;
     };
 };
 
@@ -93,8 +98,13 @@ using U8 = TypedValue<Type::U8>;
 using U16 = TypedValue<Type::U16>;
 using U32 = TypedValue<Type::U32>;
 using U64 = TypedValue<Type::U64>;
+using F16 = TypedValue<Type::F16>;
+using F32 = TypedValue<Type::F32>;
+using F64 = TypedValue<Type::F64>;
 using U32U64 = TypedValue<Type::U32 | Type::U64>;
+using F32F64 = TypedValue<Type::F32 | Type::F64>;
 using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
+using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
 using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
index d2c44b9ccd..cb3a326cfa 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -11,7 +11,7 @@ namespace Shader::Maxwell {
 namespace {
 
 void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
-          const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
+          const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
     union {
         u64 raw;
         BitField<0, 8, IR::Reg> dest_reg;
@@ -24,17 +24,17 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin
     if (cc) {
         throw NotImplementedException("FADD CC");
     }
-    const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)};
-    const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
+    const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
+    const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
     IR::FpControl control{
         .no_contraction{true},
         .rounding{CastFpRounding(fp_rounding)},
         .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
     };
-    v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
+    v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
 }
 
-void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
+void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
     union {
         u64 raw;
         BitField<39, 2, FpRounding> fp_rounding;
@@ -53,7 +53,7 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
 } // Anonymous namespace
 
 void TranslatorVisitor::FADD_reg(u64 insn) {
-    FADD(*this, insn, GetReg20(insn));
+    FADD(*this, insn, GetReg20F(insn));
 }
 
 void TranslatorVisitor::FADD_cbuf(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
index c4288d9a83..acd8445ad1 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -55,21 +55,21 @@ size_t BitSize(DestFormat dest_format) {
     }
 }
 
-void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::U16U32U64& op_a) {
+void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
     // F2I is used to convert from a floating point value to an integer
     const F2I f2i{insn};
 
-    const IR::U16U32U64 float_value{v.ir.FPAbsNeg(op_a, f2i.abs != 0, f2i.neg != 0)};
-    const IR::U16U32U64 rounded_value{[&] {
+    const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
+    const IR::F16F32F64 rounded_value{[&] {
         switch (f2i.rounding) {
         case Rounding::Round:
-            return v.ir.FPRoundEven(float_value);
+            return v.ir.FPRoundEven(op_a);
         case Rounding::Floor:
-            return v.ir.FPFloor(float_value);
+            return v.ir.FPFloor(op_a);
         case Rounding::Ceil:
-            return v.ir.FPCeil(float_value);
+            return v.ir.FPCeil(op_a);
         case Rounding::Trunc:
-            return v.ir.FPTrunc(float_value);
+            return v.ir.FPTrunc(op_a);
         default:
             throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
         }
@@ -105,12 +105,12 @@ void TranslatorVisitor::F2I_reg(u64 insn) {
         BitField<20, 8, IR::Reg> src_reg;
     } const f2i{insn};
 
-    const IR::U16U32U64 op_a{[&]() -> IR::U16U32U64 {
+    const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
         switch (f2i.base.src_format) {
         case SrcFormat::F16:
-            return ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half);
+            return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
         case SrcFormat::F32:
-            return X(f2i.src_reg);
+            return F(f2i.src_reg);
         case SrcFormat::F64:
             return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
         default:
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
index 30ca052ec5..1464f2807a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -9,7 +9,7 @@
 
 namespace Shader::Maxwell {
 namespace {
-void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a,
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
           bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
     union {
         u64 raw;
@@ -23,18 +23,18 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
     if (cc) {
         throw NotImplementedException("FFMA CC");
     }
-    const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)};
-    const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
-    const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
+    const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
+    const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+    const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
     const IR::FpControl fp_control{
         .no_contraction{true},
         .rounding{CastFpRounding(fp_rounding)},
         .fmz_mode{CastFmzMode(fmz_mode)},
     };
-    v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control));
+    v.F(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control));
 }
 
-void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) {
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
     union {
         u64 raw;
         BitField<47, 1, u64> cc;
@@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
 } // Anonymous namespace
 
 void TranslatorVisitor::FFMA_reg(u64 insn) {
-    FFMA(*this, insn, GetReg20(insn), GetReg39(insn));
+    FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn));
 }
 
 void TranslatorVisitor::FFMA_rc(u64) {
@@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) {
 }
 
 void TranslatorVisitor::FFMA_cr(u64 insn) {
-    FFMA(*this, insn, GetCbuf(insn), GetReg39(insn));
+    FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn));
 }
 
 void TranslatorVisitor::FFMA_imm(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
index e2ab0dab22..90cddb18b4 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -35,8 +35,8 @@ void TranslatorVisitor::MUFU(u64 insn) {
         BitField<50, 1, u64> sat;
     } const mufu{insn};
 
-    const IR::U32 op_a{ir.FPAbsNeg(X(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
-    IR::U32 value{[&]() -> IR::U32 {
+    const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
+    IR::F32 value{[&]() -> IR::F32 {
         switch (mufu.operation) {
         case Operation::Cos:
             return ir.FPCosNotReduced(op_a);
@@ -65,7 +65,7 @@ void TranslatorVisitor::MUFU(u64 insn) {
         value = ir.FPSaturate(value);
     }
 
-    X(mufu.dest_reg, value);
+    F(mufu.dest_reg, value);
 }
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
index 743a1e2f0f..1b1d38be7a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -4,6 +4,7 @@
 
 #include "common/bit_field.h"
 #include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
 #include "shader_recompiler/frontend/ir/modifiers.h"
 #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
 #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
@@ -43,7 +44,7 @@ float ScaleFactor(Scale scale) {
     throw NotImplementedException("Invalid FMUL scale {}", scale);
 }
 
-void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode,
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
           FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
     union {
         u64 raw;
@@ -57,23 +58,23 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode
     if (sat) {
         throw NotImplementedException("FMUL SAT");
     }
-    IR::U32 op_a{v.X(fmul.src_a)};
+    IR::F32 op_a{v.F(fmul.src_a)};
     if (scale != Scale::None) {
         if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
             throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
         }
         op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
     }
-    const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+    const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
     const IR::FpControl fp_control{
         .no_contraction{true},
         .rounding{CastFpRounding(fp_rounding)},
         .fmz_mode{CastFmzMode(fmz_mode)},
     };
-    v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control));
+    v.F(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control));
 }
 
-void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
     union {
         u64 raw;
         BitField<39, 2, FpRounding> fp_rounding;
@@ -90,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
 } // Anonymous namespace
 
 void TranslatorVisitor::FMUL_reg(u64 insn) {
-    return FMUL(*this, insn, GetReg20(insn));
+    return FMUL(*this, insn, GetReg20F(insn));
 }
 
 void TranslatorVisitor::FMUL_cbuf(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 548c7f611d..3c9eaddd94 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -12,10 +12,18 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) {
     return ir.GetReg(reg);
 }
 
+IR::F32 TranslatorVisitor::F(IR::Reg reg) {
+    return ir.BitCast<IR::F32>(X(reg));
+}
+
 void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
     ir.SetReg(dest_reg, value);
 }
 
+void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
+    X(dest_reg, ir.BitCast<IR::U32>(value));
+}
+
 IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
     union {
         u64 raw;
@@ -32,6 +40,14 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
     return X(reg.index);
 }
 
+IR::F32 TranslatorVisitor::GetReg20F(u64 insn) {
+    return ir.BitCast<IR::F32>(GetReg20(insn));
+}
+
+IR::F32 TranslatorVisitor::GetReg39F(u64 insn) {
+    return ir.BitCast<IR::F32>(GetReg39(insn));
+}
+
 IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
     union {
         u64 raw;
@@ -49,6 +65,10 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
     return ir.GetCbuf(binding, byte_offset);
 }
 
+IR::F32 TranslatorVisitor::GetCbufF(u64 insn) {
+    return ir.BitCast<IR::F32>(GetCbuf(insn));
+}
+
 IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
     union {
         u64 raw;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index ef6d977fef..b701605d73 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -296,12 +296,18 @@ public:
     void XMAD_imm(u64 insn);
 
     [[nodiscard]] IR::U32 X(IR::Reg reg);
+    [[nodiscard]] IR::F32 F(IR::Reg reg);
+
     void X(IR::Reg dest_reg, const IR::U32& value);
+    void F(IR::Reg dest_reg, const IR::F32& value);
 
     [[nodiscard]] IR::U32 GetReg20(u64 insn);
     [[nodiscard]] IR::U32 GetReg39(u64 insn);
+    [[nodiscard]] IR::F32 GetReg20F(u64 insn);
+    [[nodiscard]] IR::F32 GetReg39F(u64 insn);
 
     [[nodiscard]] IR::U32 GetCbuf(u64 insn);
+    [[nodiscard]] IR::F32 GetCbufF(u64 insn);
 
     [[nodiscard]] IR::U32 GetImm20(u64 insn);
 
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
index 23512db1a4..de65173e8d 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -5,22 +5,23 @@
 #include "common/bit_field.h"
 #include "common/common_types.h"
 #include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
 #include "shader_recompiler/frontend/maxwell/opcode.h"
 #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
 
 namespace Shader::Maxwell {
 namespace {
 enum class InterpolationMode : u64 {
-    Pass = 0,
-    Multiply = 1,
-    Constant = 2,
-    Sc = 3,
+    Pass,
+    Multiply,
+    Constant,
+    Sc,
 };
 
 enum class SampleMode : u64 {
-    Default = 0,
-    Centroid = 1,
-    Offset = 2,
+    Default,
+    Centroid,
+    Offset,
 };
 } // Anonymous namespace
 
@@ -54,12 +55,12 @@ void TranslatorVisitor::IPA(u64 insn) {
     }
 
     const IR::Attribute attribute{ipa.attribute};
-    IR::U32 value{ir.GetAttribute(attribute)};
+    IR::F32 value{ir.GetAttribute(attribute)};
     if (IR::IsGeneric(attribute)) {
         // const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))};
         const bool is_perspective{false};
         if (is_perspective) {
-            const IR::U32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))};
+            const IR::F32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))};
             value = ir.FPMul(value, rcp_position_w);
         }
     }
@@ -68,7 +69,7 @@ void TranslatorVisitor::IPA(u64 insn) {
     case InterpolationMode::Pass:
         break;
     case InterpolationMode::Multiply:
-        value = ir.FPMul(value, ir.GetReg(ipa.multiplier));
+        value = ir.FPMul(value, F(ipa.multiplier));
         break;
     case InterpolationMode::Constant:
         throw NotImplementedException("IPA.CONSTANT");
@@ -86,7 +87,7 @@ void TranslatorVisitor::IPA(u64 insn) {
         value = ir.FPSaturate(value);
     }
 
-    ir.SetReg(ipa.dest_reg, value);
+    F(ipa.dest_reg, value);
 }
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
index c9669c6178..9f1570479d 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) {
         }
         const IR::Value vector{ir.LoadGlobal64(address)};
         for (int i = 0; i < 2; ++i) {
-            X(dest_reg + i, ir.CompositeExtract(vector, i));
+            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
         }
         break;
     }
@@ -124,7 +124,7 @@ void TranslatorVisitor::LDG(u64 insn) {
         }
         const IR::Value vector{ir.LoadGlobal128(address)};
         for (int i = 0; i < 4; ++i) {
-            X(dest_reg + i, ir.CompositeExtract(vector, i));
+            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
         }
         break;
     }
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 02f5b653d4..7fb3192d8e 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <type_traits>
 
+#include "common/bit_cast.h"
 #include "common/bit_util.h"
 #include "shader_recompiler/exception.h"
 #include "shader_recompiler/frontend/ir/microinstruction.h"
@@ -25,6 +26,8 @@ template <typename T>
         return value.U1();
     } else if constexpr (std::is_same_v<T, u32>) {
         return value.U32();
+    } else if constexpr (std::is_same_v<T, f32>) {
+        return value.F32();
     } else if constexpr (std::is_same_v<T, u64>) {
         return value.U64();
     }
@@ -115,6 +118,19 @@ void FoldLogicalAnd(IR::Inst& inst) {
     }
 }
 
+template <typename Dest, typename Source>
+void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
+    const IR::Value value{inst.Arg(0)};
+    if (value.IsImmediate()) {
+        inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))});
+        return;
+    }
+    IR::Inst* const arg_inst{value.InstRecursive()};
+    if (value.InstRecursive()->Opcode() == reverse) {
+        inst.ReplaceUsesWith(arg_inst->Arg(0));
+    }
+}
+
 void ConstantPropagation(IR::Inst& inst) {
     switch (inst.Opcode()) {
     case IR::Opcode::GetRegister:
@@ -123,6 +139,10 @@ void ConstantPropagation(IR::Inst& inst) {
         return FoldGetPred(inst);
     case IR::Opcode::IAdd32:
         return FoldAdd<u32>(inst);
+    case IR::Opcode::BitCastF32U32:
+        return FoldBitCast<f32, u32>(inst, IR::Opcode::BitCastU32F32);
+    case IR::Opcode::BitCastU32F32:
+        return FoldBitCast<u32, f32>(inst, IR::Opcode::BitCastF32U32);
     case IR::Opcode::IAdd64:
         return FoldAdd<u64>(inst);
     case IR::Opcode::BitFieldUExtract:
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index ee69a5c9d9..34393e1d57 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -108,8 +108,8 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce
            storage_buffer.offset < bias.offset_end;
 }
 
-/// Ignores a global memory operation, reads return zero and writes are ignored
-void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
+/// Discards a global memory operation, reads return zero and writes are ignored
+void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
     const IR::Value zero{u32{0}};
     switch (inst->Opcode()) {
     case IR::Opcode::LoadGlobalS8:
@@ -120,12 +120,12 @@ void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
         inst->ReplaceUsesWith(zero);
         break;
     case IR::Opcode::LoadGlobal64:
-        inst->ReplaceUsesWith(
-            IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})});
+        inst->ReplaceUsesWith(IR::Value{
+            &*block.PrependNewInst(inst, IR::Opcode::CompositeConstructU32x2, {zero, zero})});
         break;
     case IR::Opcode::LoadGlobal128:
         inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst(
-            inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})});
+            inst, IR::Opcode::CompositeConstructU32x4, {zero, zero, zero, zero})});
         break;
     case IR::Opcode::WriteGlobalS8:
     case IR::Opcode::WriteGlobalU8:
@@ -137,7 +137,8 @@ void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
         inst->Invalidate();
         break;
     default:
-        throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode());
+        throw LogicError("Invalid opcode to discard its global memory operation {}",
+                         inst->Opcode());
     }
 }
 
@@ -196,7 +197,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst,
         storage_buffer = Track(addr, nullptr);
         if (!storage_buffer) {
             // If that also failed, drop the global memory usage
-            IgnoreGlobalMemory(block, inst);
+            DiscardGlobalMemory(block, inst);
         }
     }
     // Collect storage buffer and the instruction
@@ -242,12 +243,12 @@ std::optional<IR::U32> TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) {
     if (vector.IsImmediate()) {
         return std::nullopt;
     }
-    // This vector is expected to be a CompositeConstruct2
+    // This vector is expected to be a CompositeConstructU32x2
     IR::Inst* const vector_inst{vector.InstRecursive()};
-    if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) {
+    if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) {
         return std::nullopt;
     }
-    // Grab the first argument from the CompositeConstruct2, this is the low address.
+    // Grab the first argument from the CompositeConstructU32x2, this is the low address.
     // Re-apply the offset in case we found one.
     const IR::U32 low_addr{vector_inst->Arg(0)};
     return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr;
diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp
index 4022c6fe2a..e6596d8287 100644
--- a/src/shader_recompiler/main.cpp
+++ b/src/shader_recompiler/main.cpp
@@ -52,7 +52,7 @@ int main() {
     // RunDatabase();
 
     // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"};
-    FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"};
+    FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS15C2FB1F0B965767.bin"};
     auto cfg{std::make_unique<Flow::CFG>(env, 0)};
     // fmt::print(stdout, "{}\n", cfg->Dot());
 
-- 
cgit v1.2.3-70-g09d2


From 85cce78583bc2232428a8fb39e43182877c8d5ad Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 17 Feb 2021 00:59:28 -0300
Subject: shader: Primitive Vulkan integration

---
 src/shader_recompiler/CMakeLists.txt               |   13 +-
 .../backend/spirv/emit_context.cpp                 |    2 +
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |  117 +-
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  419 ++--
 .../spirv/emit_spirv_bitwise_conversion.cpp        |   24 +-
 .../backend/spirv/emit_spirv_composite.cpp         |   48 +-
 .../backend/spirv/emit_spirv_context_get_set.cpp   |   42 +-
 .../backend/spirv/emit_spirv_control_flow.cpp      |   10 +-
 .../backend/spirv/emit_spirv_floating_point.cpp    |   92 +-
 .../backend/spirv/emit_spirv_integer.cpp           |   60 +-
 .../backend/spirv/emit_spirv_logical.cpp           |   40 +-
 .../backend/spirv/emit_spirv_memory.cpp            |   56 +-
 .../backend/spirv/emit_spirv_select.cpp            |    8 +-
 .../backend/spirv/emit_spirv_undefined.cpp         |   10 +-
 src/shader_recompiler/environment.h                |    6 +-
 src/shader_recompiler/file_environment.cpp         |    6 +-
 src/shader_recompiler/file_environment.h           |    4 +-
 src/shader_recompiler/frontend/ir/basic_block.cpp  |    2 +
 src/shader_recompiler/frontend/ir/post_order.cpp   |    2 +-
 src/shader_recompiler/frontend/maxwell/program.cpp |    2 +-
 .../frontend/maxwell/translate/impl/impl.cpp       |    8 +
 .../frontend/maxwell/translate/impl/impl.h         |    1 +
 .../maxwell/translate/impl/move_register.cpp       |   35 +-
 .../maxwell/translate/impl/not_implemented.cpp     |    4 -
 src/shader_recompiler/main.cpp                     |    2 +-
 src/shader_recompiler/profile.h                    |   13 +
 src/shader_recompiler/recompiler.cpp               |   27 +
 src/shader_recompiler/recompiler.h                 |   18 +
 src/video_core/CMakeLists.txt                      |    6 +-
 src/video_core/engines/kepler_compute.h            |    1 -
 src/video_core/engines/shader_bytecode.h           | 2298 --------------------
 src/video_core/engines/shader_header.h             |  158 --
 .../renderer_vulkan/vk_compute_pipeline.cpp        |  140 +-
 .../renderer_vulkan/vk_compute_pipeline.h          |   43 +-
 .../renderer_vulkan/vk_descriptor_pool.cpp         |    6 +-
 .../renderer_vulkan/vk_descriptor_pool.h           |   10 +-
 src/video_core/renderer_vulkan/vk_pipeline.h       |   36 +
 .../renderer_vulkan/vk_pipeline_cache.cpp          |  190 +-
 src/video_core/renderer_vulkan/vk_pipeline_cache.h |   30 +-
 src/video_core/renderer_vulkan/vk_rasterizer.cpp   |   23 +-
 src/video_core/renderer_vulkan/vk_rasterizer.h     |    3 -
 .../renderer_vulkan/vk_resource_pool.cpp           |   12 +-
 src/video_core/renderer_vulkan/vk_resource_pool.h  |   12 +-
 43 files changed, 1003 insertions(+), 3036 deletions(-)
 create mode 100644 src/shader_recompiler/profile.h
 create mode 100644 src/shader_recompiler/recompiler.cpp
 create mode 100644 src/shader_recompiler/recompiler.h
 delete mode 100644 src/video_core/engines/shader_bytecode.h
 delete mode 100644 src/video_core/engines/shader_header.h
 create mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 84be94a8d5..b56bdd3d9c 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_executable(shader_recompiler
+add_library(shader_recompiler STATIC
     backend/spirv/emit_context.cpp
     backend/spirv/emit_context.h
     backend/spirv/emit_spirv.cpp
@@ -85,13 +85,19 @@ add_executable(shader_recompiler
     ir_opt/passes.h
     ir_opt/ssa_rewrite_pass.cpp
     ir_opt/verification_pass.cpp
-    main.cpp
     object_pool.h
+    profile.h
+    recompiler.cpp
+    recompiler.h
     shader_info.h
 )
 
-target_include_directories(video_core PRIVATE sirit)
+target_include_directories(shader_recompiler PRIVATE sirit)
 target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit)
+target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit)
+
+add_executable(shader_util main.cpp)
+target_link_libraries(shader_util PRIVATE shader_recompiler)
 
 if (MSVC)
     target_compile_options(shader_recompiler PRIVATE
@@ -121,3 +127,4 @@ else()
 endif()
 
 create_target_directory_groups(shader_recompiler)
+create_target_directory_groups(shader_util)
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 1c985aff8b..770067d988 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -115,6 +115,7 @@ void EmitContext::DefineConstantBuffers(const Info& info) {
     for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
         const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)};
         Decorate(id, spv::Decoration::Binding, binding);
+        Decorate(id, spv::Decoration::DescriptorSet, 0U);
         Name(id, fmt::format("c{}", desc.index));
         std::fill_n(cbufs.data() + desc.index, desc.count, id);
         binding += desc.count;
@@ -143,6 +144,7 @@ void EmitContext::DefineStorageBuffers(const Info& info) {
     for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
         const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)};
         Decorate(id, spv::Decoration::Binding, binding);
+        Decorate(id, spv::Decoration::DescriptorSet, 0U);
         Name(id, fmt::format("ssbo{}", binding));
         std::fill_n(ssbos.data() + binding, desc.count, id);
         binding += desc.count;
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 55018332e5..d597184359 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -2,8 +2,11 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <numeric>
+#include <span>
+#include <tuple>
 #include <type_traits>
+#include <utility>
+#include <vector>
 
 #include "shader_recompiler/backend/spirv/emit_spirv.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
@@ -14,10 +17,10 @@
 namespace Shader::Backend::SPIRV {
 namespace {
 template <class Func>
-struct FuncTraits : FuncTraits<decltype(&Func::operator())> {};
+struct FuncTraits : FuncTraits<Func> {};
 
-template <class ClassType, class ReturnType_, class... Args>
-struct FuncTraits<ReturnType_ (ClassType::*)(Args...)> {
+template <class ReturnType_, class... Args>
+struct FuncTraits<ReturnType_ (*)(Args...)> {
     using ReturnType = ReturnType_;
 
     static constexpr size_t NUM_ARGS = sizeof...(Args);
@@ -26,15 +29,15 @@ struct FuncTraits<ReturnType_ (ClassType::*)(Args...)> {
     using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
 };
 
-template <auto method, typename... Args>
-void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) {
+template <auto func, typename... Args>
+void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
     const Id forward_id{inst->Definition<Id>()};
     const bool has_forward_id{Sirit::ValidId(forward_id)};
     Id current_id{};
     if (has_forward_id) {
         current_id = ctx.ExchangeCurrentId(forward_id);
     }
-    const Id new_id{(emit.*method)(ctx, std::forward<Args>(args)...)};
+    const Id new_id{func(ctx, std::forward<Args>(args)...)};
     if (has_forward_id) {
         ctx.ExchangeCurrentId(current_id);
     } else {
@@ -55,42 +58,62 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
     }
 }
 
-template <auto method, bool is_first_arg_inst, size_t... I>
-void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
-    using Traits = FuncTraits<decltype(method)>;
+template <auto func, bool is_first_arg_inst, size_t... I>
+void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
+    using Traits = FuncTraits<decltype(func)>;
     if constexpr (std::is_same_v<Traits::ReturnType, Id>) {
         if constexpr (is_first_arg_inst) {
-            SetDefinition<method>(emit, ctx, inst, inst,
-                                  Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...);
+            SetDefinition<func>(ctx, inst, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...);
         } else {
-            SetDefinition<method>(emit, ctx, inst,
-                                  Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...);
+            SetDefinition<func>(ctx, inst, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...);
         }
     } else {
         if constexpr (is_first_arg_inst) {
-            (emit.*method)(ctx, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...);
+            func(ctx, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...);
         } else {
-            (emit.*method)(ctx, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...);
+            func(ctx, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...);
         }
     }
 }
 
-template <auto method>
-void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) {
-    using Traits = FuncTraits<decltype(method)>;
+template <auto func>
+void Invoke(EmitContext& ctx, IR::Inst* inst) {
+    using Traits = FuncTraits<decltype(func)>;
     static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
     if constexpr (Traits::NUM_ARGS == 1) {
-        Invoke<method, false>(emit, ctx, inst, std::make_index_sequence<0>{});
+        Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
     } else {
         using FirstArgType = typename Traits::template ArgType<1>;
         static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>;
         using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
-        Invoke<method, is_first_arg_inst>(emit, ctx, inst, Indices{});
+        Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
+    }
+}
+
+void EmitInst(EmitContext& ctx, IR::Inst* inst) {
+    switch (inst->Opcode()) {
+#define OPCODE(name, result_type, ...)                                                             \
+    case IR::Opcode::name:                                                                         \
+        return Invoke<&Emit##name>(ctx, inst);
+#include "shader_recompiler/frontend/ir/opcodes.inc"
+#undef OPCODE
+    }
+    throw LogicError("Invalid opcode {}", inst->Opcode());
+}
+
+Id TypeId(const EmitContext& ctx, IR::Type type) {
+    switch (type) {
+    case IR::Type::U1:
+        return ctx.U1;
+    case IR::Type::U32:
+        return ctx.U32[1];
+    default:
+        throw NotImplementedException("Phi node type {}", type);
     }
 }
 } // Anonymous namespace
 
-EmitSPIRV::EmitSPIRV(IR::Program& program) {
+std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program) {
     EmitContext ctx{program};
     const Id void_function{ctx.TypeFunction(ctx.void_id)};
     // FIXME: Forward declare functions (needs sirit support)
@@ -112,43 +135,17 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) {
     if (program.info.uses_local_invocation_id) {
         interfaces.push_back(ctx.local_invocation_id);
     }
-
     const std::span interfaces_span(interfaces.data(), interfaces.size());
-    ctx.AddEntryPoint(spv::ExecutionModel::Fragment, func, "main", interfaces_span);
-    ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft);
-
-    std::vector<u32> result{ctx.Assemble()};
-    std::FILE* file{std::fopen("D:\\shader.spv", "wb")};
-    std::fwrite(result.data(), sizeof(u32), result.size(), file);
-    std::fclose(file);
-    std::system("spirv-dis D:\\shader.spv") == 0 &&
-        std::system("spirv-val --uniform-buffer-standard-layout D:\\shader.spv") == 0 &&
-        std::system("spirv-cross -V D:\\shader.spv") == 0;
-}
+    ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span);
 
-void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) {
-    switch (inst->Opcode()) {
-#define OPCODE(name, result_type, ...)                                                             \
-    case IR::Opcode::name:                                                                         \
-        return Invoke<&EmitSPIRV::Emit##name>(*this, ctx, inst);
-#include "shader_recompiler/frontend/ir/opcodes.inc"
-#undef OPCODE
-    }
-    throw LogicError("Invalid opcode {}", inst->Opcode());
-}
+    const std::array<u32, 3> workgroup_size{env.WorkgroupSize()};
+    ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1],
+                         workgroup_size[2]);
 
-static Id TypeId(const EmitContext& ctx, IR::Type type) {
-    switch (type) {
-    case IR::Type::U1:
-        return ctx.U1;
-    case IR::Type::U32:
-        return ctx.U32[1];
-    default:
-        throw NotImplementedException("Phi node type {}", type);
-    }
+    return ctx.Assemble();
 }
 
-Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) {
+Id EmitPhi(EmitContext& ctx, IR::Inst* inst) {
     const size_t num_args{inst->NumArgs()};
     boost::container::small_vector<Id, 32> operands;
     operands.reserve(num_args * 2);
@@ -178,25 +175,25 @@ Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) {
     return ctx.OpPhi(result_type, std::span(operands.data(), operands.size()));
 }
 
-void EmitSPIRV::EmitVoid(EmitContext&) {}
+void EmitVoid(EmitContext&) {}
 
-Id EmitSPIRV::EmitIdentity(EmitContext& ctx, const IR::Value& value) {
+Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
     return ctx.Def(value);
 }
 
-void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) {
+void EmitGetZeroFromOp(EmitContext&) {
     throw LogicError("Unreachable instruction");
 }
 
-void EmitSPIRV::EmitGetSignFromOp(EmitContext&) {
+void EmitGetSignFromOp(EmitContext&) {
     throw LogicError("Unreachable instruction");
 }
 
-void EmitSPIRV::EmitGetCarryFromOp(EmitContext&) {
+void EmitGetCarryFromOp(EmitContext&) {
     throw LogicError("Unreachable instruction");
 }
 
-void EmitSPIRV::EmitGetOverflowFromOp(EmitContext&) {
+void EmitGetOverflowFromOp(EmitContext&) {
     throw LogicError("Unreachable instruction");
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 8bde826137..5813f51ff1 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -8,223 +8,218 @@
 
 #include "common/common_types.h"
 #include "shader_recompiler/backend/spirv/emit_context.h"
+#include "shader_recompiler/environment.h"
 #include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/program.h"
 
 namespace Shader::Backend::SPIRV {
 
-class EmitSPIRV {
-public:
-    explicit EmitSPIRV(IR::Program& program);
+[[nodiscard]] std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program);
 
-private:
-    void EmitInst(EmitContext& ctx, IR::Inst* inst);
-
-    // Microinstruction emitters
-    Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
-    void EmitVoid(EmitContext& ctx);
-    Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
-    void EmitBranch(EmitContext& ctx, IR::Block* label);
-    void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label,
-                               IR::Block* false_label);
-    void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label);
-    void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label);
-    void EmitReturn(EmitContext& ctx);
-    void EmitGetRegister(EmitContext& ctx);
-    void EmitSetRegister(EmitContext& ctx);
-    void EmitGetPred(EmitContext& ctx);
-    void EmitSetPred(EmitContext& ctx);
-    void EmitSetGotoVariable(EmitContext& ctx);
-    void EmitGetGotoVariable(EmitContext& ctx);
-    Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
-    void EmitGetAttribute(EmitContext& ctx);
-    void EmitSetAttribute(EmitContext& ctx);
-    void EmitGetAttributeIndexed(EmitContext& ctx);
-    void EmitSetAttributeIndexed(EmitContext& ctx);
-    void EmitGetZFlag(EmitContext& ctx);
-    void EmitGetSFlag(EmitContext& ctx);
-    void EmitGetCFlag(EmitContext& ctx);
-    void EmitGetOFlag(EmitContext& ctx);
-    void EmitSetZFlag(EmitContext& ctx);
-    void EmitSetSFlag(EmitContext& ctx);
-    void EmitSetCFlag(EmitContext& ctx);
-    void EmitSetOFlag(EmitContext& ctx);
-    Id EmitWorkgroupId(EmitContext& ctx);
-    Id EmitLocalInvocationId(EmitContext& ctx);
-    Id EmitUndefU1(EmitContext& ctx);
-    Id EmitUndefU8(EmitContext& ctx);
-    Id EmitUndefU16(EmitContext& ctx);
-    Id EmitUndefU32(EmitContext& ctx);
-    Id EmitUndefU64(EmitContext& ctx);
-    void EmitLoadGlobalU8(EmitContext& ctx);
-    void EmitLoadGlobalS8(EmitContext& ctx);
-    void EmitLoadGlobalU16(EmitContext& ctx);
-    void EmitLoadGlobalS16(EmitContext& ctx);
-    void EmitLoadGlobal32(EmitContext& ctx);
-    void EmitLoadGlobal64(EmitContext& ctx);
-    void EmitLoadGlobal128(EmitContext& ctx);
-    void EmitWriteGlobalU8(EmitContext& ctx);
-    void EmitWriteGlobalS8(EmitContext& ctx);
-    void EmitWriteGlobalU16(EmitContext& ctx);
-    void EmitWriteGlobalS16(EmitContext& ctx);
-    void EmitWriteGlobal32(EmitContext& ctx);
-    void EmitWriteGlobal64(EmitContext& ctx);
-    void EmitWriteGlobal128(EmitContext& ctx);
-    void EmitLoadStorageU8(EmitContext& ctx);
-    void EmitLoadStorageS8(EmitContext& ctx);
-    void EmitLoadStorageU16(EmitContext& ctx);
-    void EmitLoadStorageS16(EmitContext& ctx);
-    Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
-    void EmitLoadStorage64(EmitContext& ctx);
-    void EmitLoadStorage128(EmitContext& ctx);
-    void EmitWriteStorageU8(EmitContext& ctx);
-    void EmitWriteStorageS8(EmitContext& ctx);
-    void EmitWriteStorageU16(EmitContext& ctx);
-    void EmitWriteStorageS16(EmitContext& ctx);
-    void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
-                            Id value);
-    void EmitWriteStorage64(EmitContext& ctx);
-    void EmitWriteStorage128(EmitContext& ctx);
-    void EmitCompositeConstructU32x2(EmitContext& ctx);
-    void EmitCompositeConstructU32x3(EmitContext& ctx);
-    void EmitCompositeConstructU32x4(EmitContext& ctx);
-    void EmitCompositeExtractU32x2(EmitContext& ctx);
-    Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index);
-    void EmitCompositeExtractU32x4(EmitContext& ctx);
-    void EmitCompositeConstructF16x2(EmitContext& ctx);
-    void EmitCompositeConstructF16x3(EmitContext& ctx);
-    void EmitCompositeConstructF16x4(EmitContext& ctx);
-    void EmitCompositeExtractF16x2(EmitContext& ctx);
-    void EmitCompositeExtractF16x3(EmitContext& ctx);
-    void EmitCompositeExtractF16x4(EmitContext& ctx);
-    void EmitCompositeConstructF32x2(EmitContext& ctx);
-    void EmitCompositeConstructF32x3(EmitContext& ctx);
-    void EmitCompositeConstructF32x4(EmitContext& ctx);
-    void EmitCompositeExtractF32x2(EmitContext& ctx);
-    void EmitCompositeExtractF32x3(EmitContext& ctx);
-    void EmitCompositeExtractF32x4(EmitContext& ctx);
-    void EmitCompositeConstructF64x2(EmitContext& ctx);
-    void EmitCompositeConstructF64x3(EmitContext& ctx);
-    void EmitCompositeConstructF64x4(EmitContext& ctx);
-    void EmitCompositeExtractF64x2(EmitContext& ctx);
-    void EmitCompositeExtractF64x3(EmitContext& ctx);
-    void EmitCompositeExtractF64x4(EmitContext& ctx);
-    void EmitSelect8(EmitContext& ctx);
-    void EmitSelect16(EmitContext& ctx);
-    void EmitSelect32(EmitContext& ctx);
-    void EmitSelect64(EmitContext& ctx);
-    void EmitBitCastU16F16(EmitContext& ctx);
-    Id EmitBitCastU32F32(EmitContext& ctx, Id value);
-    void EmitBitCastU64F64(EmitContext& ctx);
-    void EmitBitCastF16U16(EmitContext& ctx);
-    Id EmitBitCastF32U32(EmitContext& ctx, Id value);
-    void EmitBitCastF64U64(EmitContext& ctx);
-    void EmitPackUint2x32(EmitContext& ctx);
-    void EmitUnpackUint2x32(EmitContext& ctx);
-    void EmitPackFloat2x16(EmitContext& ctx);
-    void EmitUnpackFloat2x16(EmitContext& ctx);
-    void EmitPackDouble2x32(EmitContext& ctx);
-    void EmitUnpackDouble2x32(EmitContext& ctx);
-    void EmitGetZeroFromOp(EmitContext& ctx);
-    void EmitGetSignFromOp(EmitContext& ctx);
-    void EmitGetCarryFromOp(EmitContext& ctx);
-    void EmitGetOverflowFromOp(EmitContext& ctx);
-    void EmitFPAbs16(EmitContext& ctx);
-    void EmitFPAbs32(EmitContext& ctx);
-    void EmitFPAbs64(EmitContext& ctx);
-    Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
-    Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
-    Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
-    Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
-    Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
-    Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
-    void EmitFPMax32(EmitContext& ctx);
-    void EmitFPMax64(EmitContext& ctx);
-    void EmitFPMin32(EmitContext& ctx);
-    void EmitFPMin64(EmitContext& ctx);
-    Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
-    Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
-    Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
-    void EmitFPNeg16(EmitContext& ctx);
-    void EmitFPNeg32(EmitContext& ctx);
-    void EmitFPNeg64(EmitContext& ctx);
-    void EmitFPRecip32(EmitContext& ctx);
-    void EmitFPRecip64(EmitContext& ctx);
-    void EmitFPRecipSqrt32(EmitContext& ctx);
-    void EmitFPRecipSqrt64(EmitContext& ctx);
-    void EmitFPSqrt(EmitContext& ctx);
-    void EmitFPSin(EmitContext& ctx);
-    void EmitFPSinNotReduced(EmitContext& ctx);
-    void EmitFPExp2(EmitContext& ctx);
-    void EmitFPExp2NotReduced(EmitContext& ctx);
-    void EmitFPCos(EmitContext& ctx);
-    void EmitFPCosNotReduced(EmitContext& ctx);
-    void EmitFPLog2(EmitContext& ctx);
-    void EmitFPSaturate16(EmitContext& ctx);
-    void EmitFPSaturate32(EmitContext& ctx);
-    void EmitFPSaturate64(EmitContext& ctx);
-    void EmitFPRoundEven16(EmitContext& ctx);
-    void EmitFPRoundEven32(EmitContext& ctx);
-    void EmitFPRoundEven64(EmitContext& ctx);
-    void EmitFPFloor16(EmitContext& ctx);
-    void EmitFPFloor32(EmitContext& ctx);
-    void EmitFPFloor64(EmitContext& ctx);
-    void EmitFPCeil16(EmitContext& ctx);
-    void EmitFPCeil32(EmitContext& ctx);
-    void EmitFPCeil64(EmitContext& ctx);
-    void EmitFPTrunc16(EmitContext& ctx);
-    void EmitFPTrunc32(EmitContext& ctx);
-    void EmitFPTrunc64(EmitContext& ctx);
-    Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
-    void EmitIAdd64(EmitContext& ctx);
-    Id EmitISub32(EmitContext& ctx, Id a, Id b);
-    void EmitISub64(EmitContext& ctx);
-    Id EmitIMul32(EmitContext& ctx, Id a, Id b);
-    void EmitINeg32(EmitContext& ctx);
-    void EmitIAbs32(EmitContext& ctx);
-    Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift);
-    void EmitShiftRightLogical32(EmitContext& ctx);
-    void EmitShiftRightArithmetic32(EmitContext& ctx);
-    void EmitBitwiseAnd32(EmitContext& ctx);
-    void EmitBitwiseOr32(EmitContext& ctx);
-    void EmitBitwiseXor32(EmitContext& ctx);
-    void EmitBitFieldInsert(EmitContext& ctx);
-    void EmitBitFieldSExtract(EmitContext& ctx);
-    Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count);
-    Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs);
-    void EmitULessThan(EmitContext& ctx);
-    void EmitIEqual(EmitContext& ctx);
-    void EmitSLessThanEqual(EmitContext& ctx);
-    void EmitULessThanEqual(EmitContext& ctx);
-    Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
-    void EmitUGreaterThan(EmitContext& ctx);
-    void EmitINotEqual(EmitContext& ctx);
-    void EmitSGreaterThanEqual(EmitContext& ctx);
-    Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
-    void EmitLogicalOr(EmitContext& ctx);
-    void EmitLogicalAnd(EmitContext& ctx);
-    void EmitLogicalXor(EmitContext& ctx);
-    void EmitLogicalNot(EmitContext& ctx);
-    void EmitConvertS16F16(EmitContext& ctx);
-    void EmitConvertS16F32(EmitContext& ctx);
-    void EmitConvertS16F64(EmitContext& ctx);
-    void EmitConvertS32F16(EmitContext& ctx);
-    void EmitConvertS32F32(EmitContext& ctx);
-    void EmitConvertS32F64(EmitContext& ctx);
-    void EmitConvertS64F16(EmitContext& ctx);
-    void EmitConvertS64F32(EmitContext& ctx);
-    void EmitConvertS64F64(EmitContext& ctx);
-    void EmitConvertU16F16(EmitContext& ctx);
-    void EmitConvertU16F32(EmitContext& ctx);
-    void EmitConvertU16F64(EmitContext& ctx);
-    void EmitConvertU32F16(EmitContext& ctx);
-    void EmitConvertU32F32(EmitContext& ctx);
-    void EmitConvertU32F64(EmitContext& ctx);
-    void EmitConvertU64F16(EmitContext& ctx);
-    void EmitConvertU64F32(EmitContext& ctx);
-    void EmitConvertU64F64(EmitContext& ctx);
-    void EmitConvertU64U32(EmitContext& ctx);
-    void EmitConvertU32U64(EmitContext& ctx);
-};
+// Microinstruction emitters
+Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
+void EmitVoid(EmitContext& ctx);
+Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
+void EmitBranch(EmitContext& ctx, IR::Block* label);
+void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label,
+                           IR::Block* false_label);
+void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label);
+void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label);
+void EmitReturn(EmitContext& ctx);
+void EmitGetRegister(EmitContext& ctx);
+void EmitSetRegister(EmitContext& ctx);
+void EmitGetPred(EmitContext& ctx);
+void EmitSetPred(EmitContext& ctx);
+void EmitSetGotoVariable(EmitContext& ctx);
+void EmitGetGotoVariable(EmitContext& ctx);
+Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+void EmitGetAttribute(EmitContext& ctx);
+void EmitSetAttribute(EmitContext& ctx);
+void EmitGetAttributeIndexed(EmitContext& ctx);
+void EmitSetAttributeIndexed(EmitContext& ctx);
+void EmitGetZFlag(EmitContext& ctx);
+void EmitGetSFlag(EmitContext& ctx);
+void EmitGetCFlag(EmitContext& ctx);
+void EmitGetOFlag(EmitContext& ctx);
+void EmitSetZFlag(EmitContext& ctx);
+void EmitSetSFlag(EmitContext& ctx);
+void EmitSetCFlag(EmitContext& ctx);
+void EmitSetOFlag(EmitContext& ctx);
+Id EmitWorkgroupId(EmitContext& ctx);
+Id EmitLocalInvocationId(EmitContext& ctx);
+Id EmitUndefU1(EmitContext& ctx);
+Id EmitUndefU8(EmitContext& ctx);
+Id EmitUndefU16(EmitContext& ctx);
+Id EmitUndefU32(EmitContext& ctx);
+Id EmitUndefU64(EmitContext& ctx);
+void EmitLoadGlobalU8(EmitContext& ctx);
+void EmitLoadGlobalS8(EmitContext& ctx);
+void EmitLoadGlobalU16(EmitContext& ctx);
+void EmitLoadGlobalS16(EmitContext& ctx);
+void EmitLoadGlobal32(EmitContext& ctx);
+void EmitLoadGlobal64(EmitContext& ctx);
+void EmitLoadGlobal128(EmitContext& ctx);
+void EmitWriteGlobalU8(EmitContext& ctx);
+void EmitWriteGlobalS8(EmitContext& ctx);
+void EmitWriteGlobalU16(EmitContext& ctx);
+void EmitWriteGlobalS16(EmitContext& ctx);
+void EmitWriteGlobal32(EmitContext& ctx);
+void EmitWriteGlobal64(EmitContext& ctx);
+void EmitWriteGlobal128(EmitContext& ctx);
+void EmitLoadStorageU8(EmitContext& ctx);
+void EmitLoadStorageS8(EmitContext& ctx);
+void EmitLoadStorageU16(EmitContext& ctx);
+void EmitLoadStorageS16(EmitContext& ctx);
+Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+void EmitLoadStorage64(EmitContext& ctx);
+void EmitLoadStorage128(EmitContext& ctx);
+void EmitWriteStorageU8(EmitContext& ctx);
+void EmitWriteStorageS8(EmitContext& ctx);
+void EmitWriteStorageU16(EmitContext& ctx);
+void EmitWriteStorageS16(EmitContext& ctx);
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+                        Id value);
+void EmitWriteStorage64(EmitContext& ctx);
+void EmitWriteStorage128(EmitContext& ctx);
+void EmitCompositeConstructU32x2(EmitContext& ctx);
+void EmitCompositeConstructU32x3(EmitContext& ctx);
+void EmitCompositeConstructU32x4(EmitContext& ctx);
+void EmitCompositeExtractU32x2(EmitContext& ctx);
+Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index);
+void EmitCompositeExtractU32x4(EmitContext& ctx);
+void EmitCompositeConstructF16x2(EmitContext& ctx);
+void EmitCompositeConstructF16x3(EmitContext& ctx);
+void EmitCompositeConstructF16x4(EmitContext& ctx);
+void EmitCompositeExtractF16x2(EmitContext& ctx);
+void EmitCompositeExtractF16x3(EmitContext& ctx);
+void EmitCompositeExtractF16x4(EmitContext& ctx);
+void EmitCompositeConstructF32x2(EmitContext& ctx);
+void EmitCompositeConstructF32x3(EmitContext& ctx);
+void EmitCompositeConstructF32x4(EmitContext& ctx);
+void EmitCompositeExtractF32x2(EmitContext& ctx);
+void EmitCompositeExtractF32x3(EmitContext& ctx);
+void EmitCompositeExtractF32x4(EmitContext& ctx);
+void EmitCompositeConstructF64x2(EmitContext& ctx);
+void EmitCompositeConstructF64x3(EmitContext& ctx);
+void EmitCompositeConstructF64x4(EmitContext& ctx);
+void EmitCompositeExtractF64x2(EmitContext& ctx);
+void EmitCompositeExtractF64x3(EmitContext& ctx);
+void EmitCompositeExtractF64x4(EmitContext& ctx);
+void EmitSelect8(EmitContext& ctx);
+void EmitSelect16(EmitContext& ctx);
+void EmitSelect32(EmitContext& ctx);
+void EmitSelect64(EmitContext& ctx);
+void EmitBitCastU16F16(EmitContext& ctx);
+Id EmitBitCastU32F32(EmitContext& ctx, Id value);
+void EmitBitCastU64F64(EmitContext& ctx);
+void EmitBitCastF16U16(EmitContext& ctx);
+Id EmitBitCastF32U32(EmitContext& ctx, Id value);
+void EmitBitCastF64U64(EmitContext& ctx);
+void EmitPackUint2x32(EmitContext& ctx);
+void EmitUnpackUint2x32(EmitContext& ctx);
+void EmitPackFloat2x16(EmitContext& ctx);
+void EmitUnpackFloat2x16(EmitContext& ctx);
+void EmitPackDouble2x32(EmitContext& ctx);
+void EmitUnpackDouble2x32(EmitContext& ctx);
+void EmitGetZeroFromOp(EmitContext& ctx);
+void EmitGetSignFromOp(EmitContext& ctx);
+void EmitGetCarryFromOp(EmitContext& ctx);
+void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitFPAbs16(EmitContext& ctx);
+void EmitFPAbs32(EmitContext& ctx);
+void EmitFPAbs64(EmitContext& ctx);
+Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+void EmitFPMax32(EmitContext& ctx);
+void EmitFPMax64(EmitContext& ctx);
+void EmitFPMin32(EmitContext& ctx);
+void EmitFPMin64(EmitContext& ctx);
+Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+void EmitFPNeg16(EmitContext& ctx);
+void EmitFPNeg32(EmitContext& ctx);
+void EmitFPNeg64(EmitContext& ctx);
+void EmitFPRecip32(EmitContext& ctx);
+void EmitFPRecip64(EmitContext& ctx);
+void EmitFPRecipSqrt32(EmitContext& ctx);
+void EmitFPRecipSqrt64(EmitContext& ctx);
+void EmitFPSqrt(EmitContext& ctx);
+void EmitFPSin(EmitContext& ctx);
+void EmitFPSinNotReduced(EmitContext& ctx);
+void EmitFPExp2(EmitContext& ctx);
+void EmitFPExp2NotReduced(EmitContext& ctx);
+void EmitFPCos(EmitContext& ctx);
+void EmitFPCosNotReduced(EmitContext& ctx);
+void EmitFPLog2(EmitContext& ctx);
+void EmitFPSaturate16(EmitContext& ctx);
+void EmitFPSaturate32(EmitContext& ctx);
+void EmitFPSaturate64(EmitContext& ctx);
+void EmitFPRoundEven16(EmitContext& ctx);
+void EmitFPRoundEven32(EmitContext& ctx);
+void EmitFPRoundEven64(EmitContext& ctx);
+void EmitFPFloor16(EmitContext& ctx);
+void EmitFPFloor32(EmitContext& ctx);
+void EmitFPFloor64(EmitContext& ctx);
+void EmitFPCeil16(EmitContext& ctx);
+void EmitFPCeil32(EmitContext& ctx);
+void EmitFPCeil64(EmitContext& ctx);
+void EmitFPTrunc16(EmitContext& ctx);
+void EmitFPTrunc32(EmitContext& ctx);
+void EmitFPTrunc64(EmitContext& ctx);
+Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+void EmitIAdd64(EmitContext& ctx);
+Id EmitISub32(EmitContext& ctx, Id a, Id b);
+void EmitISub64(EmitContext& ctx);
+Id EmitIMul32(EmitContext& ctx, Id a, Id b);
+void EmitINeg32(EmitContext& ctx);
+void EmitIAbs32(EmitContext& ctx);
+Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift);
+void EmitShiftRightLogical32(EmitContext& ctx);
+void EmitShiftRightArithmetic32(EmitContext& ctx);
+void EmitBitwiseAnd32(EmitContext& ctx);
+void EmitBitwiseOr32(EmitContext& ctx);
+void EmitBitwiseXor32(EmitContext& ctx);
+void EmitBitFieldInsert(EmitContext& ctx);
+void EmitBitFieldSExtract(EmitContext& ctx);
+Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count);
+Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs);
+void EmitULessThan(EmitContext& ctx);
+void EmitIEqual(EmitContext& ctx);
+void EmitSLessThanEqual(EmitContext& ctx);
+void EmitULessThanEqual(EmitContext& ctx);
+Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
+void EmitUGreaterThan(EmitContext& ctx);
+void EmitINotEqual(EmitContext& ctx);
+void EmitSGreaterThanEqual(EmitContext& ctx);
+Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+void EmitLogicalOr(EmitContext& ctx);
+void EmitLogicalAnd(EmitContext& ctx);
+void EmitLogicalXor(EmitContext& ctx);
+void EmitLogicalNot(EmitContext& ctx);
+void EmitConvertS16F16(EmitContext& ctx);
+void EmitConvertS16F32(EmitContext& ctx);
+void EmitConvertS16F64(EmitContext& ctx);
+void EmitConvertS32F16(EmitContext& ctx);
+void EmitConvertS32F32(EmitContext& ctx);
+void EmitConvertS32F64(EmitContext& ctx);
+void EmitConvertS64F16(EmitContext& ctx);
+void EmitConvertS64F32(EmitContext& ctx);
+void EmitConvertS64F64(EmitContext& ctx);
+void EmitConvertU16F16(EmitContext& ctx);
+void EmitConvertU16F32(EmitContext& ctx);
+void EmitConvertU16F64(EmitContext& ctx);
+void EmitConvertU32F16(EmitContext& ctx);
+void EmitConvertU32F32(EmitContext& ctx);
+void EmitConvertU32F64(EmitContext& ctx);
+void EmitConvertU64F16(EmitContext& ctx);
+void EmitConvertU64F32(EmitContext& ctx);
+void EmitConvertU64F64(EmitContext& ctx);
+void EmitConvertU64U32(EmitContext& ctx);
+void EmitConvertU32U64(EmitContext& ctx);
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
index af82df99ce..49c2004987 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
@@ -6,51 +6,51 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitSPIRV::EmitBitCastU16F16(EmitContext&) {
+void EmitBitCastU16F16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitBitCastU32F32(EmitContext& ctx, Id value) {
+Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
     return ctx.OpBitcast(ctx.U32[1], value);
 }
 
-void EmitSPIRV::EmitBitCastU64F64(EmitContext&) {
+void EmitBitCastU64F64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitBitCastF16U16(EmitContext&) {
+void EmitBitCastF16U16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitBitCastF32U32(EmitContext& ctx, Id value) {
+Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
     return ctx.OpBitcast(ctx.F32[1], value);
 }
 
-void EmitSPIRV::EmitBitCastF64U64(EmitContext&) {
+void EmitBitCastF64U64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitPackUint2x32(EmitContext&) {
+void EmitPackUint2x32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitUnpackUint2x32(EmitContext&) {
+void EmitUnpackUint2x32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitPackFloat2x16(EmitContext&) {
+void EmitPackFloat2x16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitUnpackFloat2x16(EmitContext&) {
+void EmitUnpackFloat2x16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitPackDouble2x32(EmitContext&) {
+void EmitPackDouble2x32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitUnpackDouble2x32(EmitContext&) {
+void EmitUnpackDouble2x32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
index a7374c89d2..348e4796d5 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
@@ -6,99 +6,99 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitSPIRV::EmitCompositeConstructU32x2(EmitContext&) {
+void EmitCompositeConstructU32x2(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeConstructU32x3(EmitContext&) {
+void EmitCompositeConstructU32x3(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeConstructU32x4(EmitContext&) {
+void EmitCompositeConstructU32x4(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeExtractU32x2(EmitContext&) {
+void EmitCompositeExtractU32x2(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) {
+Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) {
     return ctx.OpCompositeExtract(ctx.U32[1], vector, index);
 }
 
-void EmitSPIRV::EmitCompositeExtractU32x4(EmitContext&) {
+void EmitCompositeExtractU32x4(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeConstructF16x2(EmitContext&) {
+void EmitCompositeConstructF16x2(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeConstructF16x3(EmitContext&) {
+void EmitCompositeConstructF16x3(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeConstructF16x4(EmitContext&) {
+void EmitCompositeConstructF16x4(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeExtractF16x2(EmitContext&) {
+void EmitCompositeExtractF16x2(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeExtractF16x3(EmitContext&) {
+void EmitCompositeExtractF16x3(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeExtractF16x4(EmitContext&) {
+void EmitCompositeExtractF16x4(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeConstructF32x2(EmitContext&) {
+void EmitCompositeConstructF32x2(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeConstructF32x3(EmitContext&) {
+void EmitCompositeConstructF32x3(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeConstructF32x4(EmitContext&) {
+void EmitCompositeConstructF32x4(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeExtractF32x2(EmitContext&) {
+void EmitCompositeExtractF32x2(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeExtractF32x3(EmitContext&) {
+void EmitCompositeExtractF32x3(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeExtractF32x4(EmitContext&) {
+void EmitCompositeExtractF32x4(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeConstructF64x2(EmitContext&) {
+void EmitCompositeConstructF64x2(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeConstructF64x3(EmitContext&) {
+void EmitCompositeConstructF64x3(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeConstructF64x4(EmitContext&) {
+void EmitCompositeConstructF64x4(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeExtractF64x2(EmitContext&) {
+void EmitCompositeExtractF64x2(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeExtractF64x3(EmitContext&) {
+void EmitCompositeExtractF64x3(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitCompositeExtractF64x4(EmitContext&) {
+void EmitCompositeExtractF64x4(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index f4c9970ebd..eb9c01c5a5 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -6,31 +6,31 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitSPIRV::EmitGetRegister(EmitContext&) {
+void EmitGetRegister(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSetRegister(EmitContext&) {
+void EmitSetRegister(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitGetPred(EmitContext&) {
+void EmitGetPred(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSetPred(EmitContext&) {
+void EmitSetPred(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSetGotoVariable(EmitContext&) {
+void EmitSetGotoVariable(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitGetGotoVariable(EmitContext&) {
+void EmitGetGotoVariable(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
     if (!binding.IsImmediate()) {
         throw NotImplementedException("Constant buffer indexing");
     }
@@ -43,59 +43,59 @@ Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::
     return ctx.OpLoad(ctx.U32[1], access_chain);
 }
 
-void EmitSPIRV::EmitGetAttribute(EmitContext&) {
+void EmitGetAttribute(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSetAttribute(EmitContext&) {
+void EmitSetAttribute(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitGetAttributeIndexed(EmitContext&) {
+void EmitGetAttributeIndexed(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSetAttributeIndexed(EmitContext&) {
+void EmitSetAttributeIndexed(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitGetZFlag(EmitContext&) {
+void EmitGetZFlag(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitGetSFlag(EmitContext&) {
+void EmitGetSFlag(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitGetCFlag(EmitContext&) {
+void EmitGetCFlag(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitGetOFlag(EmitContext&) {
+void EmitGetOFlag(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSetZFlag(EmitContext&) {
+void EmitSetZFlag(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSetSFlag(EmitContext&) {
+void EmitSetSFlag(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSetCFlag(EmitContext&) {
+void EmitSetCFlag(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSetOFlag(EmitContext&) {
+void EmitSetOFlag(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitWorkgroupId(EmitContext& ctx) {
+Id EmitWorkgroupId(EmitContext& ctx) {
     return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id);
 }
 
-Id EmitSPIRV::EmitLocalInvocationId(EmitContext& ctx) {
+Id EmitLocalInvocationId(EmitContext& ctx) {
     return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id);
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
index 549c1907a2..6c4199664f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -6,25 +6,25 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Block* label) {
+void EmitBranch(EmitContext& ctx, IR::Block* label) {
     ctx.OpBranch(label->Definition<Id>());
 }
 
-void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label,
+void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label,
                                       IR::Block* false_label) {
     ctx.OpBranchConditional(condition, true_label->Definition<Id>(), false_label->Definition<Id>());
 }
 
-void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) {
+void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) {
     ctx.OpLoopMerge(merge_label->Definition<Id>(), continue_label->Definition<Id>(),
                     spv::LoopControlMask::MaskNone);
 }
 
-void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) {
+void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) {
     ctx.OpSelectionMerge(merge_label->Definition<Id>(), spv::SelectionControlMask::MaskNone);
 }
 
-void EmitSPIRV::EmitReturn(EmitContext& ctx) {
+void EmitReturn(EmitContext& ctx) {
     ctx.OpReturn();
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
index c9bc121f81..d24fbb353e 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -33,187 +33,187 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
 
 } // Anonymous namespace
 
-void EmitSPIRV::EmitFPAbs16(EmitContext&) {
+void EmitFPAbs16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPAbs32(EmitContext&) {
+void EmitFPAbs32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPAbs64(EmitContext&) {
+void EmitFPAbs64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
     return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b));
 }
 
-Id EmitSPIRV::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
     return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b));
 }
 
-Id EmitSPIRV::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
     return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b));
 }
 
-Id EmitSPIRV::EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
     return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
 }
 
-Id EmitSPIRV::EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
     return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
 }
 
-Id EmitSPIRV::EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
     return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
 }
 
-void EmitSPIRV::EmitFPMax32(EmitContext&) {
+void EmitFPMax32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPMax64(EmitContext&) {
+void EmitFPMax64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPMin32(EmitContext&) {
+void EmitFPMin32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPMin64(EmitContext&) {
+void EmitFPMin64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
     return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
 }
 
-Id EmitSPIRV::EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
     return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b));
 }
 
-Id EmitSPIRV::EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
     return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
 }
 
-void EmitSPIRV::EmitFPNeg16(EmitContext&) {
+void EmitFPNeg16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPNeg32(EmitContext&) {
+void EmitFPNeg32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPNeg64(EmitContext&) {
+void EmitFPNeg64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPRecip32(EmitContext&) {
+void EmitFPRecip32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPRecip64(EmitContext&) {
+void EmitFPRecip64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPRecipSqrt32(EmitContext&) {
+void EmitFPRecipSqrt32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPRecipSqrt64(EmitContext&) {
+void EmitFPRecipSqrt64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPSqrt(EmitContext&) {
+void EmitFPSqrt(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPSin(EmitContext&) {
+void EmitFPSin(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPSinNotReduced(EmitContext&) {
+void EmitFPSinNotReduced(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPExp2(EmitContext&) {
+void EmitFPExp2(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPExp2NotReduced(EmitContext&) {
+void EmitFPExp2NotReduced(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPCos(EmitContext&) {
+void EmitFPCos(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPCosNotReduced(EmitContext&) {
+void EmitFPCosNotReduced(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPLog2(EmitContext&) {
+void EmitFPLog2(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPSaturate16(EmitContext&) {
+void EmitFPSaturate16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPSaturate32(EmitContext&) {
+void EmitFPSaturate32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPSaturate64(EmitContext&) {
+void EmitFPSaturate64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPRoundEven16(EmitContext&) {
+void EmitFPRoundEven16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPRoundEven32(EmitContext&) {
+void EmitFPRoundEven32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPRoundEven64(EmitContext&) {
+void EmitFPRoundEven64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPFloor16(EmitContext&) {
+void EmitFPFloor16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPFloor32(EmitContext&) {
+void EmitFPFloor32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPFloor64(EmitContext&) {
+void EmitFPFloor64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPCeil16(EmitContext&) {
+void EmitFPCeil16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPCeil32(EmitContext&) {
+void EmitFPCeil32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPCeil64(EmitContext&) {
+void EmitFPCeil64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPTrunc16(EmitContext&) {
+void EmitFPTrunc16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPTrunc32(EmitContext&) {
+void EmitFPTrunc32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitFPTrunc64(EmitContext&) {
+void EmitFPTrunc64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
index 32af94a736..a1d16b81e4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -6,126 +6,126 @@
 
 namespace Shader::Backend::SPIRV {
 
-Id EmitSPIRV::EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
     if (inst->HasAssociatedPseudoOperation()) {
         throw NotImplementedException("Pseudo-operations on IAdd32");
     }
     return ctx.OpIAdd(ctx.U32[1], a, b);
 }
 
-void EmitSPIRV::EmitIAdd64(EmitContext&) {
+void EmitIAdd64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitISub32(EmitContext& ctx, Id a, Id b) {
+Id EmitISub32(EmitContext& ctx, Id a, Id b) {
     return ctx.OpISub(ctx.U32[1], a, b);
 }
 
-void EmitSPIRV::EmitISub64(EmitContext&) {
+void EmitISub64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitIMul32(EmitContext& ctx, Id a, Id b) {
+Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
     return ctx.OpIMul(ctx.U32[1], a, b);
 }
 
-void EmitSPIRV::EmitINeg32(EmitContext&) {
+void EmitINeg32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitIAbs32(EmitContext&) {
+void EmitIAbs32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) {
+Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) {
     return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift);
 }
 
-void EmitSPIRV::EmitShiftRightLogical32(EmitContext&) {
+void EmitShiftRightLogical32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitShiftRightArithmetic32(EmitContext&) {
+void EmitShiftRightArithmetic32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitBitwiseAnd32(EmitContext&) {
+void EmitBitwiseAnd32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitBitwiseOr32(EmitContext&) {
+void EmitBitwiseOr32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitBitwiseXor32(EmitContext&) {
+void EmitBitwiseXor32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitBitFieldInsert(EmitContext&) {
+void EmitBitFieldInsert(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitBitFieldSExtract(EmitContext&) {
+void EmitBitFieldSExtract(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) {
+Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) {
     return ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count);
 }
 
-Id EmitSPIRV::EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpSLessThan(ctx.U1, lhs, rhs);
 }
 
-void EmitSPIRV::EmitULessThan(EmitContext&) {
+void EmitULessThan(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitIEqual(EmitContext&) {
+void EmitIEqual(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSLessThanEqual(EmitContext&) {
+void EmitSLessThanEqual(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitULessThanEqual(EmitContext&) {
+void EmitULessThanEqual(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpSGreaterThan(ctx.U1, lhs, rhs);
 }
 
-void EmitSPIRV::EmitUGreaterThan(EmitContext&) {
+void EmitUGreaterThan(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitINotEqual(EmitContext&) {
+void EmitINotEqual(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSGreaterThanEqual(EmitContext&) {
+void EmitSGreaterThanEqual(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs);
 }
 
-void EmitSPIRV::EmitLogicalOr(EmitContext&) {
+void EmitLogicalOr(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLogicalAnd(EmitContext&) {
+void EmitLogicalAnd(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLogicalXor(EmitContext&) {
+void EmitLogicalXor(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLogicalNot(EmitContext&) {
+void EmitLogicalNot(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
index 7b43c4ed80..ff2f4fb744 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
@@ -6,83 +6,83 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitSPIRV::EmitConvertS16F16(EmitContext&) {
+void EmitConvertS16F16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertS16F32(EmitContext&) {
+void EmitConvertS16F32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertS16F64(EmitContext&) {
+void EmitConvertS16F64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertS32F16(EmitContext&) {
+void EmitConvertS32F16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertS32F32(EmitContext&) {
+void EmitConvertS32F32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertS32F64(EmitContext&) {
+void EmitConvertS32F64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertS64F16(EmitContext&) {
+void EmitConvertS64F16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertS64F32(EmitContext&) {
+void EmitConvertS64F32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertS64F64(EmitContext&) {
+void EmitConvertS64F64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertU16F16(EmitContext&) {
+void EmitConvertU16F16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertU16F32(EmitContext&) {
+void EmitConvertU16F32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertU16F64(EmitContext&) {
+void EmitConvertU16F64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertU32F16(EmitContext&) {
+void EmitConvertU32F16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertU32F32(EmitContext&) {
+void EmitConvertU32F32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertU32F64(EmitContext&) {
+void EmitConvertU32F64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertU64F16(EmitContext&) {
+void EmitConvertU64F16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertU64F32(EmitContext&) {
+void EmitConvertU64F32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertU64F64(EmitContext&) {
+void EmitConvertU64F64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertU64U32(EmitContext&) {
+void EmitConvertU64U32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitConvertU32U64(EmitContext&) {
+void EmitConvertU32U64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
index 5769a3c95b..77d698ffd4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
@@ -22,79 +22,79 @@ static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element
     return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
 }
 
-void EmitSPIRV::EmitLoadGlobalU8(EmitContext&) {
+void EmitLoadGlobalU8(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLoadGlobalS8(EmitContext&) {
+void EmitLoadGlobalS8(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLoadGlobalU16(EmitContext&) {
+void EmitLoadGlobalU16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLoadGlobalS16(EmitContext&) {
+void EmitLoadGlobalS16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLoadGlobal32(EmitContext&) {
+void EmitLoadGlobal32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLoadGlobal64(EmitContext&) {
+void EmitLoadGlobal64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLoadGlobal128(EmitContext&) {
+void EmitLoadGlobal128(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteGlobalU8(EmitContext&) {
+void EmitWriteGlobalU8(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteGlobalS8(EmitContext&) {
+void EmitWriteGlobalS8(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteGlobalU16(EmitContext&) {
+void EmitWriteGlobalU16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteGlobalS16(EmitContext&) {
+void EmitWriteGlobalS16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteGlobal32(EmitContext&) {
+void EmitWriteGlobal32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteGlobal64(EmitContext&) {
+void EmitWriteGlobal64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteGlobal128(EmitContext&) {
+void EmitWriteGlobal128(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLoadStorageU8(EmitContext&) {
+void EmitLoadStorageU8(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLoadStorageS8(EmitContext&) {
+void EmitLoadStorageS8(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLoadStorageU16(EmitContext&) {
+void EmitLoadStorageU16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLoadStorageS16(EmitContext&) {
+void EmitLoadStorageS16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding,
+Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding,
                                 const IR::Value& offset) {
     if (!binding.IsImmediate()) {
         throw NotImplementedException("Dynamic storage buffer indexing");
@@ -105,31 +105,31 @@ Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding,
     return ctx.OpLoad(ctx.U32[1], pointer);
 }
 
-void EmitSPIRV::EmitLoadStorage64(EmitContext&) {
+void EmitLoadStorage64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitLoadStorage128(EmitContext&) {
+void EmitLoadStorage128(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteStorageU8(EmitContext&) {
+void EmitWriteStorageU8(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteStorageS8(EmitContext&) {
+void EmitWriteStorageS8(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteStorageU16(EmitContext&) {
+void EmitWriteStorageU16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteStorageS16(EmitContext&) {
+void EmitWriteStorageS16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding,
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding,
                                    const IR::Value& offset, Id value) {
     if (!binding.IsImmediate()) {
         throw NotImplementedException("Dynamic storage buffer indexing");
@@ -140,11 +140,11 @@ void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding,
     ctx.OpStore(pointer, value);
 }
 
-void EmitSPIRV::EmitWriteStorage64(EmitContext&) {
+void EmitWriteStorage64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitWriteStorage128(EmitContext&) {
+void EmitWriteStorage128(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
index 40a856f72a..8d50627247 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
@@ -6,19 +6,19 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitSPIRV::EmitSelect8(EmitContext&) {
+void EmitSelect8(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSelect16(EmitContext&) {
+void EmitSelect16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSelect32(EmitContext&) {
+void EmitSelect32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSPIRV::EmitSelect64(EmitContext&) {
+void EmitSelect64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
index c1ed8f281f..19b06dbe49 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
@@ -6,23 +6,23 @@
 
 namespace Shader::Backend::SPIRV {
 
-Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) {
+Id EmitUndefU1(EmitContext& ctx) {
     return ctx.OpUndef(ctx.U1);
 }
 
-Id EmitSPIRV::EmitUndefU8(EmitContext&) {
+Id EmitUndefU8(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitUndefU16(EmitContext&) {
+Id EmitUndefU16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitSPIRV::EmitUndefU32(EmitContext& ctx) {
+Id EmitUndefU32(EmitContext& ctx) {
     return ctx.OpUndef(ctx.U32[1]);
 }
 
-Id EmitSPIRV::EmitUndefU64(EmitContext&) {
+Id EmitUndefU64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index f6230e8171..0ba681fb96 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <array>
+
 #include "common/common_types.h"
 
 namespace Shader {
@@ -8,7 +10,9 @@ class Environment {
 public:
     virtual ~Environment() = default;
 
-    [[nodiscard]] virtual u64 ReadInstruction(u32 address) const = 0;
+    [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0;
+
+    [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0;
 };
 
 } // namespace Shader
diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp
index b34bf462b6..5127523f97 100644
--- a/src/shader_recompiler/file_environment.cpp
+++ b/src/shader_recompiler/file_environment.cpp
@@ -29,7 +29,7 @@ FileEnvironment::FileEnvironment(const char* path) {
 
 FileEnvironment::~FileEnvironment() = default;
 
-u64 FileEnvironment::ReadInstruction(u32 offset) const {
+u64 FileEnvironment::ReadInstruction(u32 offset) {
     if (offset % 8 != 0) {
         throw InvalidArgument("offset={} is not aligned to 8", offset);
     }
@@ -39,4 +39,8 @@ u64 FileEnvironment::ReadInstruction(u32 offset) const {
     return data[offset / 8];
 }
 
+std::array<u32, 3> FileEnvironment::WorkgroupSize() {
+    return {1, 1, 1};
+}
+
 } // namespace Shader
diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h
index c294bc6faf..b8c4bbadd9 100644
--- a/src/shader_recompiler/file_environment.h
+++ b/src/shader_recompiler/file_environment.h
@@ -12,7 +12,9 @@ public:
     explicit FileEnvironment(const char* path);
     ~FileEnvironment() override;
 
-    u64 ReadInstruction(u32 offset) const override;
+    u64 ReadInstruction(u32 offset) override;
+
+    std::array<u32, 3> WorkgroupSize() override;
 
 private:
     std::vector<u64> data;
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index 5ae91dd7dc..ec029dfd6e 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -127,6 +127,8 @@ static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_ind
         return fmt::format("#{}", arg.U32());
     case Type::U64:
         return fmt::format("#{}", arg.U64());
+    case Type::F32:
+        return fmt::format("#{}", arg.F32());
     case Type::Reg:
         return fmt::format("{}", arg.Reg());
     case Type::Pred:
diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp
index a48b8dec5a..8709a2ea1e 100644
--- a/src/shader_recompiler/frontend/ir/post_order.cpp
+++ b/src/shader_recompiler/frontend/ir/post_order.cpp
@@ -28,7 +28,7 @@ BlockList PostOrder(const BlockList& blocks) {
         if (!visited.insert(branch).second) {
             return false;
         }
-        // Calling push_back twice is faster than insert on msvc
+        // Calling push_back twice is faster than insert on MSVC
         block_stack.push_back(block);
         block_stack.push_back(branch);
         return true;
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 8331d576c1..8c44ebb29d 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -69,7 +69,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
         Optimization::VerificationPass(function);
     }
     Optimization::CollectShaderInfoPass(program);
-    //*/
+    fmt::print(stdout, "{}\n", IR::DumpProgram(program));
     return program;
 }
 
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 3c9eaddd94..079e3497f2 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -24,6 +24,14 @@ void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
     X(dest_reg, ir.BitCast<IR::U32>(value));
 }
 
+IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> index;
+    } const reg{insn};
+    return X(reg.index);
+}
+
 IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
     union {
         u64 raw;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index b701605d73..8bd468244e 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -301,6 +301,7 @@ public:
     void X(IR::Reg dest_reg, const IR::U32& value);
     void F(IR::Reg dest_reg, const IR::F32& value);
 
+    [[nodiscard]] IR::U32 GetReg8(u64 insn);
     [[nodiscard]] IR::U32 GetReg20(u64 insn);
     [[nodiscard]] IR::U32 GetReg39(u64 insn);
     [[nodiscard]] IR::F32 GetReg20F(u64 insn);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
index 1f83d10683..c3c4b9abd2 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -10,36 +10,35 @@
 
 namespace Shader::Maxwell {
 namespace {
-union MOV {
-    u64 raw;
-    BitField<0, 8, IR::Reg> dest_reg;
-    BitField<20, 8, IR::Reg> src_reg;
-    BitField<39, 4, u64> mask;
-};
-
-void CheckMask(MOV mov) {
-    if (mov.mask != 0xf) {
+void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<39, 4, u64> mask;
+        BitField<12, 4, u64> mov32i_mask;
+    } const mov{insn};
+
+    if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
         throw NotImplementedException("Non-full move mask");
     }
+    v.X(mov.dest_reg, src);
 }
 } // Anonymous namespace
 
 void TranslatorVisitor::MOV_reg(u64 insn) {
-    const MOV mov{insn};
-    CheckMask(mov);
-    X(mov.dest_reg, X(mov.src_reg));
+    MOV(*this, insn, GetReg8(insn));
 }
 
 void TranslatorVisitor::MOV_cbuf(u64 insn) {
-    const MOV mov{insn};
-    CheckMask(mov);
-    X(mov.dest_reg, GetCbuf(insn));
+    MOV(*this, insn, GetCbuf(insn));
 }
 
 void TranslatorVisitor::MOV_imm(u64 insn) {
-    const MOV mov{insn};
-    CheckMask(mov);
-    X(mov.dest_reg, GetImm20(insn));
+    MOV(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::MOV32I(u64 insn) {
+    MOV(*this, insn, GetImm32(insn), true);
 }
 
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 1bb160acbc..6b2a1356bc 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -617,10 +617,6 @@ void TranslatorVisitor::MEMBAR(u64) {
     ThrowNotImplemented(Opcode::MEMBAR);
 }
 
-void TranslatorVisitor::MOV32I(u64) {
-    ThrowNotImplemented(Opcode::MOV32I);
-}
-
 void TranslatorVisitor::NOP(u64) {
     ThrowNotImplemented(Opcode::NOP);
 }
diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp
index 1610bb34e1..050a37f180 100644
--- a/src/shader_recompiler/main.cpp
+++ b/src/shader_recompiler/main.cpp
@@ -76,5 +76,5 @@ int main() {
     fmt::print(stdout, "{}\n", cfg.Dot());
     IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)};
     fmt::print(stdout, "{}\n", IR::DumpProgram(program));
-    Backend::SPIRV::EmitSPIRV spirv{program};
+    void(Backend::SPIRV::EmitSPIRV(env, program));
 }
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
new file mode 100644
index 0000000000..c96d783b77
--- /dev/null
+++ b/src/shader_recompiler/profile.h
@@ -0,0 +1,13 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+namespace Shader {
+
+struct Profile {
+    bool unified_descriptor_binding;
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp
new file mode 100644
index 0000000000..b25081e39b
--- /dev/null
+++ b/src/shader_recompiler/recompiler.cpp
@@ -0,0 +1,27 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "common/common_types.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/program.h"
+#include "shader_recompiler/object_pool.h"
+#include "shader_recompiler/recompiler.h"
+
+namespace Shader {
+
+std::pair<Info, std::vector<u32>> RecompileSPIRV(Environment& env, u32 start_address) {
+    ObjectPool<Maxwell::Flow::Block> flow_block_pool;
+    ObjectPool<IR::Inst> inst_pool;
+    ObjectPool<IR::Block> block_pool;
+
+    Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address};
+    IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)};
+    return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)};
+}
+
+} // namespace Shader
diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h
new file mode 100644
index 0000000000..4cb9738782
--- /dev/null
+++ b/src/shader_recompiler/recompiler.h
@@ -0,0 +1,18 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader {
+
+[[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(Environment& env, u32 start_address);
+
+} // namespace Shader
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index c5ce71706a..3323e69169 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -43,9 +43,6 @@ add_library(video_core STATIC
     engines/maxwell_3d.h
     engines/maxwell_dma.cpp
     engines/maxwell_dma.h
-    engines/shader_bytecode.h
-    engines/shader_header.h
-    engines/shader_type.h
     framebuffer_config.h
     macro/macro.cpp
     macro/macro.h
@@ -123,6 +120,7 @@ add_library(video_core STATIC
     renderer_vulkan/vk_master_semaphore.h
     renderer_vulkan/vk_pipeline_cache.cpp
     renderer_vulkan/vk_pipeline_cache.h
+    renderer_vulkan/vk_pipeline.h
     renderer_vulkan/vk_query_cache.cpp
     renderer_vulkan/vk_query_cache.h
     renderer_vulkan/vk_rasterizer.cpp
@@ -201,7 +199,7 @@ add_library(video_core STATIC
 create_target_directory_groups(video_core)
 
 target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad xbyak)
+target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak)
 
 if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
     add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 0d7683c2d5..f8b8d06ac7 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -12,7 +12,6 @@
 #include "common/common_types.h"
 #include "video_core/engines/engine_interface.h"
 #include "video_core/engines/engine_upload.h"
-#include "video_core/engines/shader_type.h"
 #include "video_core/gpu.h"
 #include "video_core/textures/texture.h"
 
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
deleted file mode 100644
index 8b45f1b623..0000000000
--- a/src/video_core/engines/shader_bytecode.h
+++ /dev/null
@@ -1,2298 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <bitset>
-#include <optional>
-#include <tuple>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/bit_field.h"
-#include "common/common_types.h"
-
-namespace Tegra::Shader {
-
-struct Register {
-    /// Number of registers
-    static constexpr std::size_t NumRegisters = 256;
-
-    /// Register 255 is special cased to always be 0
-    static constexpr std::size_t ZeroIndex = 255;
-
-    enum class Size : u64 {
-        Byte = 0,
-        Short = 1,
-        Word = 2,
-        Long = 3,
-    };
-
-    constexpr Register() = default;
-
-    constexpr Register(u64 value_) : value(value_) {}
-
-    [[nodiscard]] constexpr operator u64() const {
-        return value;
-    }
-
-    template <typename T>
-    [[nodiscard]] constexpr u64 operator-(const T& oth) const {
-        return value - oth;
-    }
-
-    template <typename T>
-    [[nodiscard]] constexpr u64 operator&(const T& oth) const {
-        return value & oth;
-    }
-
-    [[nodiscard]] constexpr u64 operator&(const Register& oth) const {
-        return value & oth.value;
-    }
-
-    [[nodiscard]] constexpr u64 operator~() const {
-        return ~value;
-    }
-
-    [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const {
-        elem = (value + elem) & 3;
-        return (value & ~3) + elem;
-    }
-
-private:
-    u64 value{};
-};
-
-enum class AttributeSize : u64 {
-    Word = 0,
-    DoubleWord = 1,
-    TripleWord = 2,
-    QuadWord = 3,
-};
-
-union Attribute {
-    Attribute() = default;
-
-    constexpr explicit Attribute(u64 value_) : value(value_) {}
-
-    enum class Index : u64 {
-        LayerViewportPointSize = 6,
-        Position = 7,
-        Attribute_0 = 8,
-        Attribute_31 = 39,
-        FrontColor = 40,
-        FrontSecondaryColor = 41,
-        BackColor = 42,
-        BackSecondaryColor = 43,
-        ClipDistances0123 = 44,
-        ClipDistances4567 = 45,
-        PointCoord = 46,
-        // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
-        // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
-        // shader.
-        TessCoordInstanceIDVertexID = 47,
-        TexCoord_0 = 48,
-        TexCoord_7 = 55,
-        // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
-        // shader. It is unknown what the other values contain.
-        FrontFacing = 63,
-    };
-
-    union {
-        BitField<20, 10, u64> immediate;
-        BitField<22, 2, u64> element;
-        BitField<24, 6, Index> index;
-        BitField<31, 1, u64> patch;
-        BitField<47, 3, AttributeSize> size;
-
-        [[nodiscard]] bool IsPhysical() const {
-            return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
-        }
-    } fmt20;
-
-    union {
-        BitField<30, 2, u64> element;
-        BitField<32, 6, Index> index;
-    } fmt28;
-
-    BitField<39, 8, u64> reg;
-    u64 value{};
-};
-
-union Sampler {
-    Sampler() = default;
-
-    constexpr explicit Sampler(u64 value_) : value(value_) {}
-
-    enum class Index : u64 {
-        Sampler_0 = 8,
-    };
-
-    BitField<36, 13, Index> index;
-    u64 value{};
-};
-
-union Image {
-    Image() = default;
-
-    constexpr explicit Image(u64 value_) : value{value_} {}
-
-    BitField<36, 13, u64> index;
-    u64 value;
-};
-
-} // namespace Tegra::Shader
-
-namespace std {
-
-// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330.
-template <>
-struct make_unsigned<Tegra::Shader::Attribute> {
-    using type = Tegra::Shader::Attribute;
-};
-
-template <>
-struct make_unsigned<Tegra::Shader::Register> {
-    using type = Tegra::Shader::Register;
-};
-
-} // namespace std
-
-namespace Tegra::Shader {
-
-enum class Pred : u64 {
-    UnusedIndex = 0x7,
-    NeverExecute = 0xF,
-};
-
-enum class PredCondition : u64 {
-    F = 0,    // Always false
-    LT = 1,   // Ordered less than
-    EQ = 2,   // Ordered equal
-    LE = 3,   // Ordered less than or equal
-    GT = 4,   // Ordered greater than
-    NE = 5,   // Ordered not equal
-    GE = 6,   // Ordered greater than or equal
-    NUM = 7,  // Ordered
-    NAN_ = 8, // Unordered
-    LTU = 9,  // Unordered less than
-    EQU = 10, // Unordered equal
-    LEU = 11, // Unordered less than or equal
-    GTU = 12, // Unordered greater than
-    NEU = 13, // Unordered not equal
-    GEU = 14, // Unordered greater than or equal
-    T = 15,   // Always true
-};
-
-enum class PredOperation : u64 {
-    And = 0,
-    Or = 1,
-    Xor = 2,
-};
-
-enum class LogicOperation : u64 {
-    And = 0,
-    Or = 1,
-    Xor = 2,
-    PassB = 3,
-};
-
-enum class SubOp : u64 {
-    Cos = 0x0,
-    Sin = 0x1,
-    Ex2 = 0x2,
-    Lg2 = 0x3,
-    Rcp = 0x4,
-    Rsq = 0x5,
-    Sqrt = 0x8,
-};
-
-enum class F2iRoundingOp : u64 {
-    RoundEven = 0,
-    Floor = 1,
-    Ceil = 2,
-    Trunc = 3,
-};
-
-enum class F2fRoundingOp : u64 {
-    None = 0,
-    Pass = 3,
-    Round = 8,
-    Floor = 9,
-    Ceil = 10,
-    Trunc = 11,
-};
-
-enum class AtomicOp : u64 {
-    Add = 0,
-    Min = 1,
-    Max = 2,
-    Inc = 3,
-    Dec = 4,
-    And = 5,
-    Or = 6,
-    Xor = 7,
-    Exch = 8,
-    SafeAdd = 10,
-};
-
-enum class GlobalAtomicType : u64 {
-    U32 = 0,
-    S32 = 1,
-    U64 = 2,
-    F32_FTZ_RN = 3,
-    F16x2_FTZ_RN = 4,
-    S64 = 5,
-};
-
-enum class UniformType : u64 {
-    UnsignedByte = 0,
-    SignedByte = 1,
-    UnsignedShort = 2,
-    SignedShort = 3,
-    Single = 4,
-    Double = 5,
-    Quad = 6,
-    UnsignedQuad = 7,
-};
-
-enum class StoreType : u64 {
-    Unsigned8 = 0,
-    Signed8 = 1,
-    Unsigned16 = 2,
-    Signed16 = 3,
-    Bits32 = 4,
-    Bits64 = 5,
-    Bits128 = 6,
-};
-
-enum class AtomicType : u64 {
-    U32 = 0,
-    S32 = 1,
-    U64 = 2,
-    S64 = 3,
-};
-
-enum class IMinMaxExchange : u64 {
-    None = 0,
-    XLo = 1,
-    XMed = 2,
-    XHi = 3,
-};
-
-enum class VideoType : u64 {
-    Size16_Low = 0,
-    Size16_High = 1,
-    Size32 = 2,
-    Invalid = 3,
-};
-
-enum class VmadShr : u64 {
-    Shr7 = 1,
-    Shr15 = 2,
-};
-
-enum class VmnmxType : u64 {
-    Bits8,
-    Bits16,
-    Bits32,
-};
-
-enum class VmnmxOperation : u64 {
-    Mrg_16H = 0,
-    Mrg_16L = 1,
-    Mrg_8B0 = 2,
-    Mrg_8B2 = 3,
-    Acc = 4,
-    Min = 5,
-    Max = 6,
-    Nop = 7,
-};
-
-enum class XmadMode : u64 {
-    None = 0,
-    CLo = 1,
-    CHi = 2,
-    CSfu = 3,
-    CBcc = 4,
-};
-
-enum class IAdd3Mode : u64 {
-    None = 0,
-    RightShift = 1,
-    LeftShift = 2,
-};
-
-enum class IAdd3Height : u64 {
-    None = 0,
-    LowerHalfWord = 1,
-    UpperHalfWord = 2,
-};
-
-enum class FlowCondition : u64 {
-    Always = 0xF,
-    Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
-};
-
-enum class ConditionCode : u64 {
-    F = 0,
-    LT = 1,
-    EQ = 2,
-    LE = 3,
-    GT = 4,
-    NE = 5,
-    GE = 6,
-    Num = 7,
-    Nan = 8,
-    LTU = 9,
-    EQU = 10,
-    LEU = 11,
-    GTU = 12,
-    NEU = 13,
-    GEU = 14,
-    T = 15,
-    OFF = 16,
-    LO = 17,
-    SFF = 18,
-    LS = 19,
-    HI = 20,
-    SFT = 21,
-    HS = 22,
-    OFT = 23,
-    CSM_TA = 24,
-    CSM_TR = 25,
-    CSM_MX = 26,
-    FCSM_TA = 27,
-    FCSM_TR = 28,
-    FCSM_MX = 29,
-    RLE = 30,
-    RGT = 31,
-};
-
-enum class PredicateResultMode : u64 {
-    None = 0x0,
-    NotZero = 0x3,
-};
-
-enum class TextureType : u64 {
-    Texture1D = 0,
-    Texture2D = 1,
-    Texture3D = 2,
-    TextureCube = 3,
-};
-
-enum class TextureQueryType : u64 {
-    Dimension = 1,
-    TextureType = 2,
-    SamplePosition = 5,
-    Filter = 16,
-    LevelOfDetail = 18,
-    Wrap = 20,
-    BorderColor = 22,
-};
-
-enum class TextureProcessMode : u64 {
-    None = 0,
-    LZ = 1,  // Load LOD of zero.
-    LB = 2,  // Load Bias.
-    LL = 3,  // Load LOD.
-    LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
-    LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL.
-};
-
-enum class TextureMiscMode : u64 {
-    DC,
-    AOFFI, // Uses Offset
-    NDV,
-    NODEP,
-    MZ,
-    PTP,
-};
-
-enum class SurfaceDataMode : u64 {
-    P = 0,
-    D_BA = 1,
-};
-
-enum class OutOfBoundsStore : u64 {
-    Ignore = 0,
-    Clamp = 1,
-    Trap = 2,
-};
-
-enum class ImageType : u64 {
-    Texture1D = 0,
-    TextureBuffer = 1,
-    Texture1DArray = 2,
-    Texture2D = 3,
-    Texture2DArray = 4,
-    Texture3D = 5,
-};
-
-enum class IsberdMode : u64 {
-    None = 0,
-    Patch = 1,
-    Prim = 2,
-    Attr = 3,
-};
-
-enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
-
-enum class MembarType : u64 {
-    CTA = 0,
-    GL = 1,
-    SYS = 2,
-    VC = 3,
-};
-
-enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
-
-enum class HalfType : u64 {
-    H0_H1 = 0,
-    F32 = 1,
-    H0_H0 = 2,
-    H1_H1 = 3,
-};
-
-enum class HalfMerge : u64 {
-    H0_H1 = 0,
-    F32 = 1,
-    Mrg_H0 = 2,
-    Mrg_H1 = 3,
-};
-
-enum class HalfPrecision : u64 {
-    None = 0,
-    FTZ = 1,
-    FMZ = 2,
-};
-
-enum class R2pMode : u64 {
-    Pr = 0,
-    Cc = 1,
-};
-
-enum class IpaInterpMode : u64 {
-    Pass = 0,
-    Multiply = 1,
-    Constant = 2,
-    Sc = 3,
-};
-
-enum class IpaSampleMode : u64 {
-    Default = 0,
-    Centroid = 1,
-    Offset = 2,
-};
-
-enum class LmemLoadCacheManagement : u64 {
-    Default = 0,
-    LU = 1,
-    CI = 2,
-    CV = 3,
-};
-
-enum class StoreCacheManagement : u64 {
-    Default = 0,
-    CG = 1,
-    CS = 2,
-    WT = 3,
-};
-
-struct IpaMode {
-    IpaInterpMode interpolation_mode;
-    IpaSampleMode sampling_mode;
-
-    [[nodiscard]] bool operator==(const IpaMode& a) const {
-        return std::tie(interpolation_mode, sampling_mode) ==
-               std::tie(a.interpolation_mode, a.sampling_mode);
-    }
-    [[nodiscard]] bool operator!=(const IpaMode& a) const {
-        return !operator==(a);
-    }
-    [[nodiscard]] bool operator<(const IpaMode& a) const {
-        return std::tie(interpolation_mode, sampling_mode) <
-               std::tie(a.interpolation_mode, a.sampling_mode);
-    }
-};
-
-enum class SystemVariable : u64 {
-    LaneId = 0x00,
-    VirtCfg = 0x02,
-    VirtId = 0x03,
-    Pm0 = 0x04,
-    Pm1 = 0x05,
-    Pm2 = 0x06,
-    Pm3 = 0x07,
-    Pm4 = 0x08,
-    Pm5 = 0x09,
-    Pm6 = 0x0a,
-    Pm7 = 0x0b,
-    OrderingTicket = 0x0f,
-    PrimType = 0x10,
-    InvocationId = 0x11,
-    Ydirection = 0x12,
-    ThreadKill = 0x13,
-    ShaderType = 0x14,
-    DirectBeWriteAddressLow = 0x15,
-    DirectBeWriteAddressHigh = 0x16,
-    DirectBeWriteEnabled = 0x17,
-    MachineId0 = 0x18,
-    MachineId1 = 0x19,
-    MachineId2 = 0x1a,
-    MachineId3 = 0x1b,
-    Affinity = 0x1c,
-    InvocationInfo = 0x1d,
-    WscaleFactorXY = 0x1e,
-    WscaleFactorZ = 0x1f,
-    Tid = 0x20,
-    TidX = 0x21,
-    TidY = 0x22,
-    TidZ = 0x23,
-    CtaParam = 0x24,
-    CtaIdX = 0x25,
-    CtaIdY = 0x26,
-    CtaIdZ = 0x27,
-    NtId = 0x28,
-    CirQueueIncrMinusOne = 0x29,
-    Nlatc = 0x2a,
-    SmSpaVersion = 0x2c,
-    MultiPassShaderInfo = 0x2d,
-    LwinHi = 0x2e,
-    SwinHi = 0x2f,
-    SwinLo = 0x30,
-    SwinSz = 0x31,
-    SmemSz = 0x32,
-    SmemBanks = 0x33,
-    LwinLo = 0x34,
-    LwinSz = 0x35,
-    LmemLosz = 0x36,
-    LmemHioff = 0x37,
-    EqMask = 0x38,
-    LtMask = 0x39,
-    LeMask = 0x3a,
-    GtMask = 0x3b,
-    GeMask = 0x3c,
-    RegAlloc = 0x3d,
-    CtxAddr = 0x3e,      // .fmask = F_SM50
-    BarrierAlloc = 0x3e, // .fmask = F_SM60
-    GlobalErrorStatus = 0x40,
-    WarpErrorStatus = 0x42,
-    WarpErrorStatusClear = 0x43,
-    PmHi0 = 0x48,
-    PmHi1 = 0x49,
-    PmHi2 = 0x4a,
-    PmHi3 = 0x4b,
-    PmHi4 = 0x4c,
-    PmHi5 = 0x4d,
-    PmHi6 = 0x4e,
-    PmHi7 = 0x4f,
-    ClockLo = 0x50,
-    ClockHi = 0x51,
-    GlobalTimerLo = 0x52,
-    GlobalTimerHi = 0x53,
-    HwTaskId = 0x60,
-    CircularQueueEntryIndex = 0x61,
-    CircularQueueEntryAddressLow = 0x62,
-    CircularQueueEntryAddressHigh = 0x63,
-};
-
-enum class PhysicalAttributeDirection : u64 {
-    Input = 0,
-    Output = 1,
-};
-
-enum class VoteOperation : u64 {
-    All = 0, // allThreadsNV
-    Any = 1, // anyThreadNV
-    Eq = 2,  // allThreadsEqualNV
-};
-
-enum class ImageAtomicOperationType : u64 {
-    U32 = 0,
-    S32 = 1,
-    U64 = 2,
-    F32 = 3,
-    S64 = 5,
-    SD32 = 6,
-    SD64 = 7,
-};
-
-enum class ImageAtomicOperation : u64 {
-    Add = 0,
-    Min = 1,
-    Max = 2,
-    Inc = 3,
-    Dec = 4,
-    And = 5,
-    Or = 6,
-    Xor = 7,
-    Exch = 8,
-};
-
-enum class ShuffleOperation : u64 {
-    Idx = 0,  // shuffleNV
-    Up = 1,   // shuffleUpNV
-    Down = 2, // shuffleDownNV
-    Bfly = 3, // shuffleXorNV
-};
-
-enum class ShfType : u64 {
-    Bits32 = 0,
-    U64 = 2,
-    S64 = 3,
-};
-
-enum class ShfXmode : u64 {
-    None = 0,
-    HI = 1,
-    X = 2,
-    XHI = 3,
-};
-
-union Instruction {
-    constexpr Instruction& operator=(const Instruction& instr) {
-        value = instr.value;
-        return *this;
-    }
-
-    constexpr Instruction(u64 value_) : value{value_} {}
-    constexpr Instruction(const Instruction& instr) : value(instr.value) {}
-
-    [[nodiscard]] constexpr bool Bit(u64 offset) const {
-        return ((value >> offset) & 1) != 0;
-    }
-
-    BitField<0, 8, Register> gpr0;
-    BitField<8, 8, Register> gpr8;
-    union {
-        BitField<16, 4, Pred> full_pred;
-        BitField<16, 3, u64> pred_index;
-    } pred;
-    BitField<19, 1, u64> negate_pred;
-    BitField<20, 8, Register> gpr20;
-    BitField<20, 4, SubOp> sub_op;
-    BitField<28, 8, Register> gpr28;
-    BitField<39, 8, Register> gpr39;
-    BitField<48, 16, u64> opcode;
-
-    union {
-        BitField<8, 5, ConditionCode> cc;
-        BitField<13, 1, u64> trigger;
-    } nop;
-
-    union {
-        BitField<48, 2, VoteOperation> operation;
-        BitField<45, 3, u64> dest_pred;
-        BitField<39, 3, u64> value;
-        BitField<42, 1, u64> negate_value;
-    } vote;
-
-    union {
-        BitField<30, 2, ShuffleOperation> operation;
-        BitField<48, 3, u64> pred48;
-        BitField<28, 1, u64> is_index_imm;
-        BitField<29, 1, u64> is_mask_imm;
-        BitField<20, 5, u64> index_imm;
-        BitField<34, 13, u64> mask_imm;
-    } shfl;
-
-    union {
-        BitField<44, 1, u64> ftz;
-        BitField<39, 2, u64> tab5cb8_2;
-        BitField<38, 1, u64> ndv;
-        BitField<47, 1, u64> cc;
-        BitField<28, 8, u64> swizzle;
-    } fswzadd;
-
-    union {
-        BitField<8, 8, Register> gpr;
-        BitField<20, 24, s64> offset;
-    } gmem;
-
-    union {
-        BitField<20, 16, u64> imm20_16;
-        BitField<20, 19, u64> imm20_19;
-        BitField<20, 32, s64> imm20_32;
-        BitField<45, 1, u64> negate_b;
-        BitField<46, 1, u64> abs_a;
-        BitField<48, 1, u64> negate_a;
-        BitField<49, 1, u64> abs_b;
-        BitField<50, 1, u64> saturate_d;
-        BitField<56, 1, u64> negate_imm;
-
-        union {
-            BitField<39, 3, u64> pred;
-            BitField<42, 1, u64> negate_pred;
-        } fmnmx;
-
-        union {
-            BitField<39, 1, u64> invert_a;
-            BitField<40, 1, u64> invert_b;
-            BitField<41, 2, LogicOperation> operation;
-            BitField<44, 2, PredicateResultMode> pred_result_mode;
-            BitField<48, 3, Pred> pred48;
-        } lop;
-
-        union {
-            BitField<53, 2, LogicOperation> operation;
-            BitField<55, 1, u64> invert_a;
-            BitField<56, 1, u64> invert_b;
-        } lop32i;
-
-        union {
-            BitField<28, 8, u64> imm_lut28;
-            BitField<48, 8, u64> imm_lut48;
-
-            [[nodiscard]] u32 GetImmLut28() const {
-                return static_cast<u32>(imm_lut28);
-            }
-
-            [[nodiscard]] u32 GetImmLut48() const {
-                return static_cast<u32>(imm_lut48);
-            }
-        } lop3;
-
-        [[nodiscard]] u16 GetImm20_16() const {
-            return static_cast<u16>(imm20_16);
-        }
-
-        [[nodiscard]] u32 GetImm20_19() const {
-            u32 imm{static_cast<u32>(imm20_19)};
-            imm <<= 12;
-            imm |= negate_imm ? 0x80000000 : 0;
-            return imm;
-        }
-
-        [[nodiscard]] u32 GetImm20_32() const {
-            return static_cast<u32>(imm20_32);
-        }
-
-        [[nodiscard]] s32 GetSignedImm20_20() const {
-            const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
-            // Sign extend the 20-bit value.
-            const auto mask = 1U << (20 - 1);
-            return static_cast<s32>((immediate ^ mask) - mask);
-        }
-    } alu;
-
-    union {
-        BitField<38, 1, u64> idx;
-        BitField<51, 1, u64> saturate;
-        BitField<52, 2, IpaSampleMode> sample_mode;
-        BitField<54, 2, IpaInterpMode> interp_mode;
-    } ipa;
-
-    union {
-        BitField<39, 2, u64> tab5cb8_2;
-        BitField<41, 3, u64> postfactor;
-        BitField<44, 2, u64> tab5c68_0;
-        BitField<48, 1, u64> negate_b;
-    } fmul;
-
-    union {
-        BitField<55, 1, u64> saturate;
-    } fmul32;
-
-    union {
-        BitField<52, 1, u64> generates_cc;
-    } op_32;
-
-    union {
-        BitField<48, 1, u64> is_signed;
-    } shift;
-
-    union {
-        BitField<39, 1, u64> wrap;
-    } shr;
-
-    union {
-        BitField<37, 2, ShfType> type;
-        BitField<48, 2, ShfXmode> xmode;
-        BitField<50, 1, u64> wrap;
-        BitField<20, 6, u64> immediate;
-    } shf;
-
-    union {
-        BitField<39, 5, u64> shift_amount;
-        BitField<48, 1, u64> negate_b;
-        BitField<49, 1, u64> negate_a;
-    } alu_integer;
-
-    union {
-        BitField<43, 1, u64> x;
-    } iadd;
-
-    union {
-        BitField<39, 1, u64> ftz;
-        BitField<32, 1, u64> saturate;
-        BitField<49, 2, HalfMerge> merge;
-
-        BitField<44, 1, u64> abs_a;
-        BitField<47, 2, HalfType> type_a;
-
-        BitField<30, 1, u64> abs_b;
-        BitField<28, 2, HalfType> type_b;
-
-        BitField<35, 2, HalfType> type_c;
-    } alu_half;
-
-    union {
-        BitField<39, 2, HalfPrecision> precision;
-        BitField<39, 1, u64> ftz;
-        BitField<52, 1, u64> saturate;
-        BitField<49, 2, HalfMerge> merge;
-
-        BitField<43, 1, u64> negate_a;
-        BitField<44, 1, u64> abs_a;
-        BitField<47, 2, HalfType> type_a;
-    } alu_half_imm;
-
-    union {
-        BitField<29, 1, u64> first_negate;
-        BitField<20, 9, u64> first;
-
-        BitField<56, 1, u64> second_negate;
-        BitField<30, 9, u64> second;
-
-        [[nodiscard]] u32 PackImmediates() const {
-            // Immediates are half floats shifted.
-            constexpr u32 imm_shift = 6;
-            return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
-        }
-    } half_imm;
-
-    union {
-        union {
-            BitField<37, 2, HalfPrecision> precision;
-            BitField<32, 1, u64> saturate;
-
-            BitField<31, 1, u64> negate_b;
-            BitField<30, 1, u64> negate_c;
-            BitField<35, 2, HalfType> type_c;
-        } rr;
-
-        BitField<57, 2, HalfPrecision> precision;
-        BitField<52, 1, u64> saturate;
-
-        BitField<49, 2, HalfMerge> merge;
-
-        BitField<47, 2, HalfType> type_a;
-
-        BitField<56, 1, u64> negate_b;
-        BitField<28, 2, HalfType> type_b;
-
-        BitField<51, 1, u64> negate_c;
-        BitField<53, 2, HalfType> type_reg39;
-    } hfma2;
-
-    union {
-        BitField<40, 1, u64> invert;
-    } popc;
-
-    union {
-        BitField<41, 1, u64> sh;
-        BitField<40, 1, u64> invert;
-        BitField<48, 1, u64> is_signed;
-    } flo;
-
-    union {
-        BitField<39, 3, u64> pred;
-        BitField<42, 1, u64> neg_pred;
-    } sel;
-
-    union {
-        BitField<39, 3, u64> pred;
-        BitField<42, 1, u64> negate_pred;
-        BitField<43, 2, IMinMaxExchange> exchange;
-        BitField<48, 1, u64> is_signed;
-    } imnmx;
-
-    union {
-        BitField<31, 2, IAdd3Height> height_c;
-        BitField<33, 2, IAdd3Height> height_b;
-        BitField<35, 2, IAdd3Height> height_a;
-        BitField<37, 2, IAdd3Mode> mode;
-        BitField<49, 1, u64> neg_c;
-        BitField<50, 1, u64> neg_b;
-        BitField<51, 1, u64> neg_a;
-    } iadd3;
-
-    union {
-        BitField<54, 1, u64> saturate;
-        BitField<56, 1, u64> negate_a;
-    } iadd32i;
-
-    union {
-        BitField<53, 1, u64> negate_b;
-        BitField<54, 1, u64> abs_a;
-        BitField<56, 1, u64> negate_a;
-        BitField<57, 1, u64> abs_b;
-    } fadd32i;
-
-    union {
-        BitField<40, 1, u64> brev;
-        BitField<47, 1, u64> rd_cc;
-        BitField<48, 1, u64> is_signed;
-    } bfe;
-
-    union {
-        BitField<48, 3, u64> pred48;
-
-        union {
-            BitField<20, 20, u64> entry_a;
-            BitField<39, 5, u64> entry_b;
-            BitField<45, 1, u64> neg;
-            BitField<46, 1, u64> uses_cc;
-        } imm;
-
-        union {
-            BitField<20, 14, u64> cb_index;
-            BitField<34, 5, u64> cb_offset;
-            BitField<56, 1, u64> neg;
-            BitField<57, 1, u64> uses_cc;
-        } hi;
-
-        union {
-            BitField<20, 14, u64> cb_index;
-            BitField<34, 5, u64> cb_offset;
-            BitField<39, 5, u64> entry_a;
-            BitField<45, 1, u64> neg;
-            BitField<46, 1, u64> uses_cc;
-        } rz;
-
-        union {
-            BitField<39, 5, u64> entry_a;
-            BitField<45, 1, u64> neg;
-            BitField<46, 1, u64> uses_cc;
-        } r1;
-
-        union {
-            BitField<28, 8, u64> entry_a;
-            BitField<37, 1, u64> neg;
-            BitField<38, 1, u64> uses_cc;
-        } r2;
-
-    } lea;
-
-    union {
-        BitField<0, 5, FlowCondition> cond;
-    } flow;
-
-    union {
-        BitField<47, 1, u64> cc;
-        BitField<48, 1, u64> negate_b;
-        BitField<49, 1, u64> negate_c;
-        BitField<51, 2, u64> tab5980_1;
-        BitField<53, 2, u64> tab5980_0;
-    } ffma;
-
-    union {
-        BitField<48, 3, UniformType> type;
-        BitField<44, 2, u64> unknown;
-    } ld_c;
-
-    union {
-        BitField<48, 3, StoreType> type;
-    } ldst_sl;
-
-    union {
-        BitField<44, 2, u64> unknown;
-    } ld_l;
-
-    union {
-        BitField<44, 2, StoreCacheManagement> cache_management;
-    } st_l;
-
-    union {
-        BitField<48, 3, UniformType> type;
-        BitField<46, 2, u64> cache_mode;
-    } ldg;
-
-    union {
-        BitField<48, 3, UniformType> type;
-        BitField<46, 2, u64> cache_mode;
-    } stg;
-
-    union {
-        BitField<23, 3, AtomicOp> operation;
-        BitField<48, 1, u64> extended;
-        BitField<20, 3, GlobalAtomicType> type;
-    } red;
-
-    union {
-        BitField<52, 4, AtomicOp> operation;
-        BitField<49, 3, GlobalAtomicType> type;
-        BitField<28, 20, s64> offset;
-    } atom;
-
-    union {
-        BitField<52, 4, AtomicOp> operation;
-        BitField<28, 2, AtomicType> type;
-        BitField<30, 22, s64> offset;
-
-        [[nodiscard]] s32 GetImmediateOffset() const {
-            return static_cast<s32>(offset << 2);
-        }
-    } atoms;
-
-    union {
-        BitField<32, 1, PhysicalAttributeDirection> direction;
-        BitField<47, 3, AttributeSize> size;
-        BitField<20, 11, u64> address;
-    } al2p;
-
-    union {
-        BitField<53, 3, UniformType> type;
-        BitField<52, 1, u64> extended;
-    } generic;
-
-    union {
-        BitField<0, 3, u64> pred0;
-        BitField<3, 3, u64> pred3;
-        BitField<6, 1, u64> neg_b;
-        BitField<7, 1, u64> abs_a;
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred;
-        BitField<43, 1, u64> neg_a;
-        BitField<44, 1, u64> abs_b;
-        BitField<45, 2, PredOperation> op;
-        BitField<47, 1, u64> ftz;
-        BitField<48, 4, PredCondition> cond;
-    } fsetp;
-
-    union {
-        BitField<0, 3, u64> pred0;
-        BitField<3, 3, u64> pred3;
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred;
-        BitField<45, 2, PredOperation> op;
-        BitField<48, 1, u64> is_signed;
-        BitField<49, 3, PredCondition> cond;
-    } isetp;
-
-    union {
-        BitField<48, 1, u64> is_signed;
-        BitField<49, 3, PredCondition> cond;
-    } icmp;
-
-    union {
-        BitField<0, 3, u64> pred0;
-        BitField<3, 3, u64> pred3;
-        BitField<12, 3, u64> pred12;
-        BitField<15, 1, u64> neg_pred12;
-        BitField<24, 2, PredOperation> cond;
-        BitField<29, 3, u64> pred29;
-        BitField<32, 1, u64> neg_pred29;
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred39;
-        BitField<45, 2, PredOperation> op;
-    } psetp;
-
-    union {
-        BitField<43, 4, PredCondition> cond;
-        BitField<45, 2, PredOperation> op;
-        BitField<3, 3, u64> pred3;
-        BitField<0, 3, u64> pred0;
-        BitField<39, 3, u64> pred39;
-    } vsetp;
-
-    union {
-        BitField<12, 3, u64> pred12;
-        BitField<15, 1, u64> neg_pred12;
-        BitField<24, 2, PredOperation> cond;
-        BitField<29, 3, u64> pred29;
-        BitField<32, 1, u64> neg_pred29;
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred39;
-        BitField<44, 1, u64> bf;
-        BitField<45, 2, PredOperation> op;
-    } pset;
-
-    union {
-        BitField<0, 3, u64> pred0;
-        BitField<3, 3, u64> pred3;
-        BitField<8, 5, ConditionCode> cc; // flag in cc
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred39;
-        BitField<45, 4, PredOperation> op; // op with pred39
-    } csetp;
-
-    union {
-        BitField<6, 1, u64> ftz;
-        BitField<45, 2, PredOperation> op;
-        BitField<3, 3, u64> pred3;
-        BitField<0, 3, u64> pred0;
-        BitField<43, 1, u64> negate_a;
-        BitField<44, 1, u64> abs_a;
-        BitField<47, 2, HalfType> type_a;
-        union {
-            BitField<35, 4, PredCondition> cond;
-            BitField<49, 1, u64> h_and;
-            BitField<31, 1, u64> negate_b;
-            BitField<30, 1, u64> abs_b;
-            BitField<28, 2, HalfType> type_b;
-        } reg;
-        union {
-            BitField<56, 1, u64> negate_b;
-            BitField<54, 1, u64> abs_b;
-        } cbuf;
-        union {
-            BitField<49, 4, PredCondition> cond;
-            BitField<53, 1, u64> h_and;
-        } cbuf_and_imm;
-        BitField<42, 1, u64> neg_pred;
-        BitField<39, 3, u64> pred39;
-    } hsetp2;
-
-    union {
-        BitField<40, 1, R2pMode> mode;
-        BitField<41, 2, u64> byte;
-        BitField<20, 7, u64> immediate_mask;
-    } p2r_r2p;
-
-    union {
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred;
-        BitField<43, 1, u64> neg_a;
-        BitField<44, 1, u64> abs_b;
-        BitField<45, 2, PredOperation> op;
-        BitField<48, 4, PredCondition> cond;
-        BitField<52, 1, u64> bf;
-        BitField<53, 1, u64> neg_b;
-        BitField<54, 1, u64> abs_a;
-        BitField<55, 1, u64> ftz;
-    } fset;
-
-    union {
-        BitField<47, 1, u64> ftz;
-        BitField<48, 4, PredCondition> cond;
-    } fcmp;
-
-    union {
-        BitField<49, 1, u64> bf;
-        BitField<35, 3, PredCondition> cond;
-        BitField<50, 1, u64> ftz;
-        BitField<45, 2, PredOperation> op;
-        BitField<43, 1, u64> negate_a;
-        BitField<44, 1, u64> abs_a;
-        BitField<47, 2, HalfType> type_a;
-        BitField<31, 1, u64> negate_b;
-        BitField<30, 1, u64> abs_b;
-        BitField<28, 2, HalfType> type_b;
-        BitField<42, 1, u64> neg_pred;
-        BitField<39, 3, u64> pred39;
-    } hset2;
-
-    union {
-        BitField<39, 3, u64> pred39;
-        BitField<42, 1, u64> neg_pred;
-        BitField<44, 1, u64> bf;
-        BitField<45, 2, PredOperation> op;
-        BitField<48, 1, u64> is_signed;
-        BitField<49, 3, PredCondition> cond;
-    } iset;
-
-    union {
-        BitField<45, 1, u64> negate_a;
-        BitField<49, 1, u64> abs_a;
-        BitField<10, 2, Register::Size> src_size;
-        BitField<13, 1, u64> is_input_signed;
-        BitField<8, 2, Register::Size> dst_size;
-        BitField<12, 1, u64> is_output_signed;
-
-        union {
-            BitField<39, 2, u64> tab5cb8_2;
-        } i2f;
-
-        union {
-            BitField<39, 2, F2iRoundingOp> rounding;
-        } f2i;
-
-        union {
-            BitField<39, 4, u64> rounding;
-            // H0, H1 extract for F16 missing
-            BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
-            [[nodiscard]] F2fRoundingOp GetRoundingMode() const {
-                constexpr u64 rounding_mask = 0x0B;
-                return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
-            }
-        } f2f;
-
-        union {
-            BitField<41, 2, u64> selector;
-        } int_src;
-
-        union {
-            BitField<41, 1, u64> selector;
-        } float_src;
-    } conversion;
-
-    union {
-        BitField<28, 1, u64> array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<31, 4, u64> component_mask;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> dc_flag;
-        BitField<54, 1, u64> aoffi_flag;
-        BitField<55, 3, TextureProcessMode> process_mode;
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            return ((1ULL << component) & component_mask) != 0;
-        }
-
-        [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
-            return process_mode;
-        }
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::DC:
-                return dc_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            case TextureMiscMode::AOFFI:
-                return aoffi_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-    } tex;
-
-    union {
-        BitField<28, 1, u64> array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<31, 4, u64> component_mask;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> dc_flag;
-        BitField<36, 1, u64> aoffi_flag;
-        BitField<37, 3, TextureProcessMode> process_mode;
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            return ((1ULL << component) & component_mask) != 0;
-        }
-
-        [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
-            return process_mode;
-        }
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::DC:
-                return dc_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            case TextureMiscMode::AOFFI:
-                return aoffi_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-    } tex_b;
-
-    union {
-        BitField<22, 6, TextureQueryType> query_type;
-        BitField<31, 4, u64> component_mask;
-        BitField<49, 1, u64> nodep_flag;
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            return ((1ULL << component) & component_mask) != 0;
-        }
-    } txq;
-
-    union {
-        BitField<28, 1, u64> array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<31, 4, u64> component_mask;
-        BitField<35, 1, u64> ndv_flag;
-        BitField<49, 1, u64> nodep_flag;
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            return ((1ULL << component) & component_mask) != 0;
-        }
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::NDV:
-                return (ndv_flag != 0);
-            case TextureMiscMode::NODEP:
-                return (nodep_flag != 0);
-            default:
-                break;
-            }
-            return false;
-        }
-    } tmml;
-
-    union {
-        BitField<28, 1, u64> array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<35, 1, u64> ndv_flag;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> dc_flag;
-        BitField<54, 2, u64> offset_mode;
-        BitField<56, 2, u64> component;
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::NDV:
-                return ndv_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            case TextureMiscMode::DC:
-                return dc_flag != 0;
-            case TextureMiscMode::AOFFI:
-                return offset_mode == 1;
-            case TextureMiscMode::PTP:
-                return offset_mode == 2;
-            default:
-                break;
-            }
-            return false;
-        }
-    } tld4;
-
-    union {
-        BitField<35, 1, u64> ndv_flag;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> dc_flag;
-        BitField<33, 2, u64> offset_mode;
-        BitField<37, 2, u64> component;
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::NDV:
-                return ndv_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            case TextureMiscMode::DC:
-                return dc_flag != 0;
-            case TextureMiscMode::AOFFI:
-                return offset_mode == 1;
-            case TextureMiscMode::PTP:
-                return offset_mode == 2;
-            default:
-                break;
-            }
-            return false;
-        }
-    } tld4_b;
-
-    union {
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> dc_flag;
-        BitField<51, 1, u64> aoffi_flag;
-        BitField<52, 2, u64> component;
-        BitField<55, 1, u64> fp16_flag;
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::DC:
-                return dc_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            case TextureMiscMode::AOFFI:
-                return aoffi_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-    } tld4s;
-
-    union {
-        BitField<0, 8, Register> gpr0;
-        BitField<28, 8, Register> gpr28;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 3, u64> component_mask_selector;
-        BitField<53, 4, u64> texture_info;
-        BitField<59, 1, u64> fp32_flag;
-
-        [[nodiscard]] TextureType GetTextureType() const {
-            // The TEXS instruction has a weird encoding for the texture type.
-            if (texture_info == 0) {
-                return TextureType::Texture1D;
-            }
-            if (texture_info >= 1 && texture_info <= 9) {
-                return TextureType::Texture2D;
-            }
-            if (texture_info >= 10 && texture_info <= 11) {
-                return TextureType::Texture3D;
-            }
-            if (texture_info >= 12 && texture_info <= 13) {
-                return TextureType::TextureCube;
-            }
-
-            LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
-            UNREACHABLE();
-            return TextureType::Texture1D;
-        }
-
-        [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
-            switch (texture_info) {
-            case 0:
-            case 2:
-            case 6:
-            case 8:
-            case 9:
-            case 11:
-                return TextureProcessMode::LZ;
-            case 3:
-            case 5:
-            case 13:
-                return TextureProcessMode::LL;
-            default:
-                break;
-            }
-            return TextureProcessMode::None;
-        }
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::DC:
-                return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-
-        [[nodiscard]] bool IsArrayTexture() const {
-            // TEXS only supports Texture2D arrays.
-            return texture_info >= 7 && texture_info <= 9;
-        }
-
-        [[nodiscard]] bool HasTwoDestinations() const {
-            return gpr28.Value() != Register::ZeroIndex;
-        }
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
-                {},
-                {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
-                {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
-                {0x7, 0xb, 0xd, 0xe, 0xf},
-            }};
-
-            std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
-            index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
-
-            u32 mask = mask_lut[index][component_mask_selector];
-            // A mask of 0 means this instruction uses an unimplemented mask.
-            ASSERT(mask != 0);
-            return ((1ull << component) & mask) != 0;
-        }
-    } texs;
-
-    union {
-        BitField<28, 1, u64> is_array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<35, 1, u64> aoffi;
-        BitField<49, 1, u64> nodep_flag;
-        BitField<50, 1, u64> ms; // Multisample?
-        BitField<54, 1, u64> cl;
-        BitField<55, 1, u64> process_mode;
-
-        [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
-            return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
-        }
-    } tld;
-
-    union {
-        BitField<49, 1, u64> nodep_flag;
-        BitField<53, 4, u64> texture_info;
-        BitField<59, 1, u64> fp32_flag;
-
-        [[nodiscard]] TextureType GetTextureType() const {
-            // The TLDS instruction has a weird encoding for the texture type.
-            if (texture_info <= 1) {
-                return TextureType::Texture1D;
-            }
-            if (texture_info == 2 || texture_info == 8 || texture_info == 12 ||
-                (texture_info >= 4 && texture_info <= 6)) {
-                return TextureType::Texture2D;
-            }
-            if (texture_info == 7) {
-                return TextureType::Texture3D;
-            }
-
-            LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
-            UNREACHABLE();
-            return TextureType::Texture1D;
-        }
-
-        [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
-            if (texture_info == 1 || texture_info == 5 || texture_info == 12) {
-                return TextureProcessMode::LL;
-            }
-            return TextureProcessMode::LZ;
-        }
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::AOFFI:
-                return texture_info == 12 || texture_info == 4;
-            case TextureMiscMode::MZ:
-                return texture_info == 5;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-
-        [[nodiscard]] bool IsArrayTexture() const {
-            // TEXS only supports Texture2D arrays.
-            return texture_info == 8;
-        }
-    } tlds;
-
-    union {
-        BitField<28, 1, u64> is_array;
-        BitField<29, 2, TextureType> texture_type;
-        BitField<35, 1, u64> aoffi_flag;
-        BitField<49, 1, u64> nodep_flag;
-
-        [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
-            switch (mode) {
-            case TextureMiscMode::AOFFI:
-                return aoffi_flag != 0;
-            case TextureMiscMode::NODEP:
-                return nodep_flag != 0;
-            default:
-                break;
-            }
-            return false;
-        }
-
-    } txd;
-
-    union {
-        BitField<24, 2, StoreCacheManagement> cache_management;
-        BitField<33, 3, ImageType> image_type;
-        BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
-        BitField<51, 1, u64> is_immediate;
-        BitField<52, 1, SurfaceDataMode> mode;
-
-        BitField<20, 3, StoreType> store_data_layout;
-        BitField<20, 4, u64> component_mask_selector;
-
-        [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
-            ASSERT(mode == SurfaceDataMode::P);
-            constexpr u8 R = 0b0001;
-            constexpr u8 G = 0b0010;
-            constexpr u8 B = 0b0100;
-            constexpr u8 A = 0b1000;
-            constexpr std::array<u8, 16> mask = {
-                0,       (R),         (G),         (R | G),        (B),     (R | B),
-                (G | B), (R | G | B), (A),         (R | A),        (G | A), (R | G | A),
-                (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
-            return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
-        }
-
-        [[nodiscard]] StoreType GetStoreDataLayout() const {
-            ASSERT(mode == SurfaceDataMode::D_BA);
-            return store_data_layout;
-        }
-    } suldst;
-
-    union {
-        BitField<28, 1, u64> is_ba;
-        BitField<51, 3, ImageAtomicOperationType> operation_type;
-        BitField<33, 3, ImageType> image_type;
-        BitField<29, 4, ImageAtomicOperation> operation;
-        BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
-    } suatom_d;
-
-    union {
-        BitField<20, 24, u64> target;
-        BitField<5, 1, u64> constant_buffer;
-
-        [[nodiscard]] s32 GetBranchTarget() const {
-            // Sign extend the branch target offset
-            const auto mask = 1U << (24 - 1);
-            const auto target_value = static_cast<u32>(target);
-            constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
-
-            // The branch offset is relative to the next instruction and is stored in bytes, so
-            // divide it by the size of an instruction and add 1 to it.
-            return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
-        }
-    } bra;
-
-    union {
-        BitField<20, 24, u64> target;
-        BitField<5, 1, u64> constant_buffer;
-
-        [[nodiscard]] s32 GetBranchExtend() const {
-            // Sign extend the branch target offset
-            const auto mask = 1U << (24 - 1);
-            const auto target_value = static_cast<u32>(target);
-            constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
-
-            // The branch offset is relative to the next instruction and is stored in bytes, so
-            // divide it by the size of an instruction and add 1 to it.
-            return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
-        }
-    } brx;
-
-    union {
-        BitField<39, 1, u64> emit; // EmitVertex
-        BitField<40, 1, u64> cut;  // EndPrimitive
-    } out;
-
-    union {
-        BitField<31, 1, u64> skew;
-        BitField<32, 1, u64> o;
-        BitField<33, 2, IsberdMode> mode;
-        BitField<47, 2, IsberdShift> shift;
-    } isberd;
-
-    union {
-        BitField<8, 2, MembarType> type;
-        BitField<0, 2, MembarUnknown> unknown;
-    } membar;
-
-    union {
-        BitField<48, 1, u64> signed_a;
-        BitField<38, 1, u64> is_byte_chunk_a;
-        BitField<36, 2, VideoType> type_a;
-        BitField<36, 2, u64> byte_height_a;
-
-        BitField<49, 1, u64> signed_b;
-        BitField<50, 1, u64> use_register_b;
-        BitField<30, 1, u64> is_byte_chunk_b;
-        BitField<28, 2, VideoType> type_b;
-        BitField<28, 2, u64> byte_height_b;
-    } video;
-
-    union {
-        BitField<51, 2, VmadShr> shr;
-        BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
-        BitField<47, 1, u64> cc;
-    } vmad;
-
-    union {
-        BitField<54, 1, u64> is_dest_signed;
-        BitField<48, 1, u64> is_src_a_signed;
-        BitField<49, 1, u64> is_src_b_signed;
-        BitField<37, 2, u64> src_format_a;
-        BitField<29, 2, u64> src_format_b;
-        BitField<56, 1, u64> mx;
-        BitField<55, 1, u64> sat;
-        BitField<36, 2, u64> selector_a;
-        BitField<28, 2, u64> selector_b;
-        BitField<50, 1, u64> is_op_b_register;
-        BitField<51, 3, VmnmxOperation> operation;
-
-        [[nodiscard]] VmnmxType SourceFormatA() const {
-            switch (src_format_a) {
-            case 0b11:
-                return VmnmxType::Bits32;
-            case 0b10:
-                return VmnmxType::Bits16;
-            default:
-                return VmnmxType::Bits8;
-            }
-        }
-
-        [[nodiscard]] VmnmxType SourceFormatB() const {
-            switch (src_format_b) {
-            case 0b11:
-                return VmnmxType::Bits32;
-            case 0b10:
-                return VmnmxType::Bits16;
-            default:
-                return VmnmxType::Bits8;
-            }
-        }
-    } vmnmx;
-
-    union {
-        BitField<20, 16, u64> imm20_16;
-        BitField<35, 1, u64> high_b_rr; // used on RR
-        BitField<36, 1, u64> product_shift_left;
-        BitField<37, 1, u64> merge_37;
-        BitField<48, 1, u64> sign_a;
-        BitField<49, 1, u64> sign_b;
-        BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
-        BitField<50, 3, XmadMode> mode;
-        BitField<52, 1, u64> high_b;
-        BitField<53, 1, u64> high_a;
-        BitField<55, 1, u64> product_shift_left_second; // used on CR
-        BitField<56, 1, u64> merge_56;
-    } xmad;
-
-    union {
-        BitField<20, 14, u64> shifted_offset;
-        BitField<34, 5, u64> index;
-
-        [[nodiscard]] u64 GetOffset() const {
-            return shifted_offset * 4;
-        }
-    } cbuf34;
-
-    union {
-        BitField<20, 16, s64> offset;
-        BitField<36, 5, u64> index;
-
-        [[nodiscard]] s64 GetOffset() const {
-            return offset;
-        }
-    } cbuf36;
-
-    // Unsure about the size of this one.
-    // It's always used with a gpr0, so any size should be fine.
-    BitField<20, 8, SystemVariable> sys20;
-
-    BitField<47, 1, u64> generates_cc;
-    BitField<61, 1, u64> is_b_imm;
-    BitField<60, 1, u64> is_b_gpr;
-    BitField<59, 1, u64> is_c_gpr;
-    BitField<20, 24, s64> smem_imm;
-    BitField<0, 5, ConditionCode> flow_condition_code;
-
-    Attribute attribute;
-    Sampler sampler;
-    Image image;
-
-    u64 value;
-};
-static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
-static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout");
-
-class OpCode {
-public:
-    enum class Id {
-        KIL,
-        SSY,
-        SYNC,
-        BRK,
-        DEPBAR,
-        VOTE,
-        VOTE_VTG,
-        SHFL,
-        FSWZADD,
-        BFE_C,
-        BFE_R,
-        BFE_IMM,
-        BFI_RC,
-        BFI_IMM_R,
-        BRA,
-        BRX,
-        PBK,
-        LD_A,
-        LD_L,
-        LD_S,
-        LD_C,
-        LD,  // Load from generic memory
-        LDG, // Load from global memory
-        ST_A,
-        ST_L,
-        ST_S,
-        ST,    // Store in generic memory
-        STG,   // Store in global memory
-        RED,   // Reduction operation
-        ATOM,  // Atomic operation on global memory
-        ATOMS, // Atomic operation on shared memory
-        AL2P,  // Transforms attribute memory into physical memory
-        TEX,
-        TEX_B,  // Texture Load Bindless
-        TXQ,    // Texture Query
-        TXQ_B,  // Texture Query Bindless
-        TEXS,   // Texture Fetch with scalar/non-vec4 source/destinations
-        TLD,    // Texture Load
-        TLDS,   // Texture Load with scalar/non-vec4 source/destinations
-        TLD4,   // Texture Gather 4
-        TLD4_B, // Texture Gather 4 Bindless
-        TLD4S,  // Texture Load 4 with scalar / non - vec4 source / destinations
-        TMML_B, // Texture Mip Map Level
-        TMML,   // Texture Mip Map Level
-        TXD,    // Texture Gradient/Load with Derivates
-        TXD_B,  // Texture Gradient/Load with Derivates Bindless
-        SUST,   // Surface Store
-        SULD,   // Surface Load
-        SUATOM, // Surface Atomic Operation
-        EXIT,
-        NOP,
-        IPA,
-        OUT_R, // Emit vertex/primitive
-        ISBERD,
-        BAR,
-        MEMBAR,
-        VMAD,
-        VSETP,
-        VMNMX,
-        FFMA_IMM, // Fused Multiply and Add
-        FFMA_CR,
-        FFMA_RC,
-        FFMA_RR,
-        FADD_C,
-        FADD_R,
-        FADD_IMM,
-        FADD32I,
-        FMUL_C,
-        FMUL_R,
-        FMUL_IMM,
-        FMUL32_IMM,
-        IADD_C,
-        IADD_R,
-        IADD_IMM,
-        IADD3_C, // Add 3 Integers
-        IADD3_R,
-        IADD3_IMM,
-        IADD32I,
-        ISCADD_C, // Scale and Add
-        ISCADD_R,
-        ISCADD_IMM,
-        FLO_R,
-        FLO_C,
-        FLO_IMM,
-        LEA_R1,
-        LEA_R2,
-        LEA_RZ,
-        LEA_IMM,
-        LEA_HI,
-        HADD2_C,
-        HADD2_R,
-        HADD2_IMM,
-        HMUL2_C,
-        HMUL2_R,
-        HMUL2_IMM,
-        HFMA2_CR,
-        HFMA2_RC,
-        HFMA2_RR,
-        HFMA2_IMM_R,
-        HSETP2_C,
-        HSETP2_R,
-        HSETP2_IMM,
-        HSET2_C,
-        HSET2_R,
-        HSET2_IMM,
-        POPC_C,
-        POPC_R,
-        POPC_IMM,
-        SEL_C,
-        SEL_R,
-        SEL_IMM,
-        ICMP_RC,
-        ICMP_R,
-        ICMP_CR,
-        ICMP_IMM,
-        FCMP_RR,
-        FCMP_RC,
-        FCMP_IMMR,
-        MUFU,  // Multi-Function Operator
-        RRO_C, // Range Reduction Operator
-        RRO_R,
-        RRO_IMM,
-        F2F_C,
-        F2F_R,
-        F2F_IMM,
-        F2I_C,
-        F2I_R,
-        F2I_IMM,
-        I2F_C,
-        I2F_R,
-        I2F_IMM,
-        I2I_C,
-        I2I_R,
-        I2I_IMM,
-        LOP_C,
-        LOP_R,
-        LOP_IMM,
-        LOP32I,
-        LOP3_C,
-        LOP3_R,
-        LOP3_IMM,
-        MOV_C,
-        MOV_R,
-        MOV_IMM,
-        S2R,
-        MOV32_IMM,
-        SHL_C,
-        SHL_R,
-        SHL_IMM,
-        SHR_C,
-        SHR_R,
-        SHR_IMM,
-        SHF_RIGHT_R,
-        SHF_RIGHT_IMM,
-        SHF_LEFT_R,
-        SHF_LEFT_IMM,
-        FMNMX_C,
-        FMNMX_R,
-        FMNMX_IMM,
-        IMNMX_C,
-        IMNMX_R,
-        IMNMX_IMM,
-        FSETP_C, // Set Predicate
-        FSETP_R,
-        FSETP_IMM,
-        FSET_C,
-        FSET_R,
-        FSET_IMM,
-        ISETP_C,
-        ISETP_IMM,
-        ISETP_R,
-        ISET_R,
-        ISET_C,
-        ISET_IMM,
-        PSETP,
-        PSET,
-        CSETP,
-        R2P_IMM,
-        P2R_IMM,
-        XMAD_IMM,
-        XMAD_CR,
-        XMAD_RC,
-        XMAD_RR,
-    };
-
-    enum class Type {
-        Trivial,
-        Arithmetic,
-        ArithmeticImmediate,
-        ArithmeticInteger,
-        ArithmeticIntegerImmediate,
-        ArithmeticHalf,
-        ArithmeticHalfImmediate,
-        Bfe,
-        Bfi,
-        Shift,
-        Ffma,
-        Hfma2,
-        Flow,
-        Synch,
-        Warp,
-        Memory,
-        Texture,
-        Image,
-        FloatSet,
-        FloatSetPredicate,
-        IntegerSet,
-        IntegerSetPredicate,
-        HalfSet,
-        HalfSetPredicate,
-        PredicateSetPredicate,
-        PredicateSetRegister,
-        RegisterSetPredicate,
-        Conversion,
-        Video,
-        Xmad,
-        Unknown,
-    };
-
-    /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
-    /// conditionally executed).
-    [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) {
-        // TODO(Subv): Add the rest of unpredicated instructions.
-        return opcode != Id::SSY && opcode != Id::PBK;
-    }
-
-    class Matcher {
-    public:
-        constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_)
-            : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {}
-
-        [[nodiscard]] constexpr const char* GetName() const {
-            return name;
-        }
-
-        [[nodiscard]] constexpr u16 GetMask() const {
-            return mask;
-        }
-
-        [[nodiscard]] constexpr Id GetId() const {
-            return id;
-        }
-
-        [[nodiscard]] constexpr Type GetType() const {
-            return type;
-        }
-
-        /**
-         * Tests to see if the given instruction is the instruction this matcher represents.
-         * @param instruction The instruction to test
-         * @returns true if the given instruction matches.
-         */
-        [[nodiscard]] constexpr bool Matches(u16 instruction) const {
-            return (instruction & mask) == expected;
-        }
-
-    private:
-        const char* name;
-        u16 mask;
-        u16 expected;
-        Id id;
-        Type type;
-    };
-
-    using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>;
-    [[nodiscard]] static DecodeResult Decode(Instruction instr) {
-        static const auto table{GetDecodeTable()};
-
-        const auto matches_instruction = [instr](const auto& matcher) {
-            return matcher.Matches(static_cast<u16>(instr.opcode));
-        };
-
-        auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
-        return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter)
-                                   : std::nullopt;
-    }
-
-private:
-    struct Detail {
-    private:
-        static constexpr std::size_t opcode_bitsize = 16;
-
-        /**
-         * Generates the mask and the expected value after masking from a given bitstring.
-         * A '0' in a bitstring indicates that a zero must be present at that bit position.
-         * A '1' in a bitstring indicates that a one must be present at that bit position.
-         */
-        [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) {
-            u16 mask = 0, expect = 0;
-            for (std::size_t i = 0; i < opcode_bitsize; i++) {
-                const std::size_t bit_position = opcode_bitsize - i - 1;
-                switch (bitstring[i]) {
-                case '0':
-                    mask |= static_cast<u16>(1U << bit_position);
-                    break;
-                case '1':
-                    expect |= static_cast<u16>(1U << bit_position);
-                    mask |= static_cast<u16>(1U << bit_position);
-                    break;
-                default:
-                    // Ignore
-                    break;
-                }
-            }
-            return std::make_pair(mask, expect);
-        }
-
-    public:
-        /// Creates a matcher that can match and parse instructions based on bitstring.
-        [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op,
-                                                       Type type, const char* const name) {
-            const auto [mask, expected] = GetMaskAndExpect(bitstring);
-            return Matcher(name, mask, expected, op, type);
-        }
-    };
-
-    [[nodiscard]] static std::vector<Matcher> GetDecodeTable() {
-        std::vector<Matcher> table = {
-#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
-            INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
-            INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
-            INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
-            INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
-            INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
-            INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
-            INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
-            INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
-            INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
-            INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
-            INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
-            INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
-            INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
-            INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
-            INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
-            INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
-            INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
-            INST("100-------------", Id::LD, Type::Memory, "LD"),
-            INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
-            INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
-            INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
-            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
-            INST("101-------------", Id::ST, Type::Memory, "ST"),
-            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
-            INST("1110101111111---", Id::RED, Type::Memory, "RED"),
-            INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
-            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
-            INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
-            INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
-            INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
-            INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
-            INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
-            INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
-            INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
-            INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
-            INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
-            INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
-            INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
-            INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
-            INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
-            INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
-            INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
-            INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
-            INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
-            INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
-            INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
-            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
-            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
-            INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
-            INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
-            INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
-            INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
-            INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
-            INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
-            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
-            INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
-            INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
-            INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
-            INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
-            INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
-            INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
-            INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"),
-            INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
-            INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
-            INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
-            INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
-            INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
-            INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
-            INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
-            INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"),
-            INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"),
-            INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"),
-            INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
-            INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
-            INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
-            INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
-            INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"),
-            INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"),
-            INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"),
-            INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
-            INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
-            INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
-            INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"),
-            INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
-            INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
-            INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
-            INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
-            INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
-            INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
-            INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
-            INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
-            INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
-            INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
-            INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
-            INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"),
-            INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"),
-            INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"),
-            INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"),
-            INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"),
-            INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"),
-            INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"),
-            INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
-            INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
-            INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
-            INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
-            INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
-            INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
-            INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
-            INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
-            INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
-            INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
-            INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
-            INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
-            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
-            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
-            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
-            INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"),
-            INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
-            INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
-            INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
-            INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
-            INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
-            INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
-            INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
-            INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
-            INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
-            INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
-            INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
-            INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
-            INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
-            INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
-            INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"),
-            INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"),
-            INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"),
-            INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
-            INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
-            INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
-            INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"),
-            INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
-            INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
-            INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
-            INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
-            INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
-            INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
-            INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
-            INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"),
-            INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
-            INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
-            INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
-            INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
-            INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
-            INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
-            INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
-            INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
-            INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
-            INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
-            INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
-            INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
-            INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
-            INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
-            INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
-            INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
-            INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"),
-            INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"),
-            INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"),
-            INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"),
-            INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"),
-            INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"),
-            INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
-            INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
-            INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
-            INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
-            INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
-            INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
-            INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
-            INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
-            INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
-            INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
-            INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
-            INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
-            INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
-            INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
-            INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"),
-        };
-#undef INST
-        std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
-            // If a matcher has more bits in its mask it is more specific, so it
-            // should come first.
-            return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count();
-        });
-
-        return table;
-    }
-};
-
-} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
deleted file mode 100644
index e0d7b89c59..0000000000
--- a/src/video_core/engines/shader_header.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <optional>
-
-#include "common/bit_field.h"
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-
-namespace Tegra::Shader {
-
-enum class OutputTopology : u32 {
-    PointList = 1,
-    LineStrip = 6,
-    TriangleStrip = 7,
-};
-
-enum class PixelImap : u8 {
-    Unused = 0,
-    Constant = 1,
-    Perspective = 2,
-    ScreenLinear = 3,
-};
-
-// Documentation in:
-// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
-struct Header {
-    union {
-        BitField<0, 5, u32> sph_type;
-        BitField<5, 5, u32> version;
-        BitField<10, 4, u32> shader_type;
-        BitField<14, 1, u32> mrt_enable;
-        BitField<15, 1, u32> kills_pixels;
-        BitField<16, 1, u32> does_global_store;
-        BitField<17, 4, u32> sass_version;
-        BitField<21, 5, u32> reserved;
-        BitField<26, 1, u32> does_load_or_store;
-        BitField<27, 1, u32> does_fp64;
-        BitField<28, 4, u32> stream_out_mask;
-    } common0;
-
-    union {
-        BitField<0, 24, u32> shader_local_memory_low_size;
-        BitField<24, 8, u32> per_patch_attribute_count;
-    } common1;
-
-    union {
-        BitField<0, 24, u32> shader_local_memory_high_size;
-        BitField<24, 8, u32> threads_per_input_primitive;
-    } common2;
-
-    union {
-        BitField<0, 24, u32> shader_local_memory_crs_size;
-        BitField<24, 4, OutputTopology> output_topology;
-        BitField<28, 4, u32> reserved;
-    } common3;
-
-    union {
-        BitField<0, 12, u32> max_output_vertices;
-        BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
-        BitField<20, 4, u32> reserved;
-        BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
-    } common4;
-
-    union {
-        struct {
-            INSERT_PADDING_BYTES_NOINIT(3);  // ImapSystemValuesA
-            INSERT_PADDING_BYTES_NOINIT(1);  // ImapSystemValuesB
-            INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
-            INSERT_PADDING_BYTES_NOINIT(2);  // ImapColor
-            union {
-                BitField<0, 8, u16> clip_distances;
-                BitField<8, 1, u16> point_sprite_s;
-                BitField<9, 1, u16> point_sprite_t;
-                BitField<10, 1, u16> fog_coordinate;
-                BitField<12, 1, u16> tessellation_eval_point_u;
-                BitField<13, 1, u16> tessellation_eval_point_v;
-                BitField<14, 1, u16> instance_id;
-                BitField<15, 1, u16> vertex_id;
-            };
-            INSERT_PADDING_BYTES_NOINIT(5);  // ImapFixedFncTexture[10]
-            INSERT_PADDING_BYTES_NOINIT(1);  // ImapReserved
-            INSERT_PADDING_BYTES_NOINIT(3);  // OmapSystemValuesA
-            INSERT_PADDING_BYTES_NOINIT(1);  // OmapSystemValuesB
-            INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
-            INSERT_PADDING_BYTES_NOINIT(2);  // OmapColor
-            INSERT_PADDING_BYTES_NOINIT(2);  // OmapSystemValuesC
-            INSERT_PADDING_BYTES_NOINIT(5);  // OmapFixedFncTexture[10]
-            INSERT_PADDING_BYTES_NOINIT(1);  // OmapReserved
-        } vtg;
-
-        struct {
-            INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
-            INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
-
-            union {
-                BitField<0, 2, PixelImap> x;
-                BitField<2, 2, PixelImap> y;
-                BitField<4, 2, PixelImap> z;
-                BitField<6, 2, PixelImap> w;
-                u8 raw;
-            } imap_generic_vector[32];
-
-            INSERT_PADDING_BYTES_NOINIT(2);  // ImapColor
-            INSERT_PADDING_BYTES_NOINIT(2);  // ImapSystemValuesC
-            INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
-            INSERT_PADDING_BYTES_NOINIT(2);  // ImapReserved
-
-            struct {
-                u32 target;
-                union {
-                    BitField<0, 1, u32> sample_mask;
-                    BitField<1, 1, u32> depth;
-                    BitField<2, 30, u32> reserved;
-                };
-            } omap;
-
-            bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
-                const u32 bit = render_target * 4 + component;
-                return omap.target & (1 << bit);
-            }
-
-            PixelImap GetPixelImap(u32 attribute) const {
-                const auto get_index = [this, attribute](u32 index) {
-                    return static_cast<PixelImap>(
-                        (imap_generic_vector[attribute].raw >> (index * 2)) & 3);
-                };
-
-                std::optional<PixelImap> result;
-                for (u32 component = 0; component < 4; ++component) {
-                    const PixelImap index = get_index(component);
-                    if (index == PixelImap::Unused) {
-                        continue;
-                    }
-                    if (result && result != index) {
-                        LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
-                    }
-                    result = index;
-                }
-                return result.value_or(PixelImap::Unused);
-            }
-        } ps;
-
-        std::array<u32, 0xF> raw;
-    };
-
-    u64 GetLocalMemorySize() const {
-        return (common1.shader_local_memory_low_size |
-                (common2.shader_local_memory_high_size << 24));
-    }
-};
-static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
-
-} // namespace Tegra::Shader
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 7a3660496c..588ce61398 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -4,6 +4,9 @@
 
 #include <vector>
 
+#include <boost/container/small_vector.hpp>
+
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
@@ -13,9 +16,142 @@
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
+namespace {
+vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) {
+    boost::container::small_vector<VkDescriptorSetLayoutBinding, 24> bindings;
+    u32 binding{};
+    for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
+        bindings.push_back({
+            .binding = binding,
+            .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .descriptorCount = 1,
+            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+            .pImmutableSamplers = nullptr,
+        });
+        ++binding;
+    }
+    for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
+        bindings.push_back({
+            .binding = binding,
+            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .descriptorCount = 1,
+            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+            .pImmutableSamplers = nullptr,
+        });
+        ++binding;
+    }
+    return device.GetLogical().CreateDescriptorSetLayout({
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .bindingCount = static_cast<u32>(bindings.size()),
+        .pBindings = bindings.data(),
+    });
+}
+
+vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
+    const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout,
+    VkPipelineLayout pipeline_layout) {
+    boost::container::small_vector<VkDescriptorUpdateTemplateEntry, 24> entries;
+    size_t offset{};
+    u32 binding{};
+    for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
+        entries.push_back({
+            .dstBinding = binding,
+            .dstArrayElement = 0,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .offset = offset,
+            .stride = sizeof(DescriptorUpdateEntry),
+        });
+        ++binding;
+        offset += sizeof(DescriptorUpdateEntry);
+    }
+    for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
+        entries.push_back({
+            .dstBinding = binding,
+            .dstArrayElement = 0,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .offset = offset,
+            .stride = sizeof(DescriptorUpdateEntry),
+        });
+        ++binding;
+        offset += sizeof(DescriptorUpdateEntry);
+    }
+    return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
+        .pDescriptorUpdateEntries = entries.data(),
+        .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET,
+        .descriptorSetLayout = descriptor_set_layout,
+        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE,
+        .pipelineLayout = pipeline_layout,
+        .set = 0,
+    });
+}
+} // Anonymous namespace
+
+ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
+                                 VKUpdateDescriptorQueue& update_descriptor_queue_,
+                                 const Shader::Info& info_, vk::ShaderModule spv_module_)
+    : update_descriptor_queue{&update_descriptor_queue_}, info{info_},
+      spv_module(std::move(spv_module_)),
+      descriptor_set_layout(CreateDescriptorSetLayout(device, info)),
+      descriptor_allocator(descriptor_pool, *descriptor_set_layout),
+      pipeline_layout{device.GetLogical().CreatePipelineLayout({
+          .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+          .pNext = nullptr,
+          .flags = 0,
+          .setLayoutCount = 1,
+          .pSetLayouts = descriptor_set_layout.address(),
+          .pushConstantRangeCount = 0,
+          .pPushConstantRanges = nullptr,
+      })},
+      descriptor_update_template{
+          CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)},
+      pipeline{device.GetLogical().CreateComputePipeline({
+          .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+          .pNext = nullptr,
+          .flags = 0,
+          .stage{
+              .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+              .pNext = nullptr,
+              .flags = 0,
+              .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+              .module = *spv_module,
+              .pName = "main",
+              .pSpecializationInfo = nullptr,
+          },
+          .layout = *pipeline_layout,
+          .basePipelineHandle = 0,
+          .basePipelineIndex = 0,
+      })} {}
+
+void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) {
+    u32 enabled_uniforms{};
+    for (const auto& desc : info.constant_buffer_descriptors) {
+        enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index;
+    }
+    buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms);
 
-ComputePipeline::ComputePipeline() = default;
+    buffer_cache.UnbindComputeStorageBuffers();
+    size_t index{};
+    for (const auto& desc : info.storage_buffers_descriptors) {
+        ASSERT(desc.count == 1);
+        buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true);
+        ++index;
+    }
+    buffer_cache.UpdateComputeBuffers();
+    buffer_cache.BindHostComputeBuffers();
+}
 
-ComputePipeline::~ComputePipeline() = default;
+VkDescriptorSet ComputePipeline::UpdateDescriptorSet() {
+    const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
+    update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
+    return descriptor_set;
+}
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 433d8bb3de..dc045d5245 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -5,19 +5,52 @@
 #pragma once
 
 #include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_pipeline.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
 class Device;
-class VKScheduler;
-class VKUpdateDescriptorQueue;
 
-class ComputePipeline {
+class ComputePipeline : public Pipeline {
 public:
-    explicit ComputePipeline();
-    ~ComputePipeline();
+    explicit ComputePipeline() = default;
+    explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
+                             VKUpdateDescriptorQueue& update_descriptor_queue,
+                             const Shader::Info& info, vk::ShaderModule spv_module);
+
+    ComputePipeline& operator=(ComputePipeline&&) noexcept = default;
+    ComputePipeline(ComputePipeline&&) noexcept = default;
+
+    ComputePipeline& operator=(const ComputePipeline&) = delete;
+    ComputePipeline(const ComputePipeline&) = delete;
+
+    void ConfigureBufferCache(BufferCache& buffer_cache);
+
+    [[nodiscard]] VkDescriptorSet UpdateDescriptorSet();
+
+    [[nodiscard]] VkPipeline Handle() const noexcept {
+        return *pipeline;
+    }
+
+    [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept {
+        return *pipeline_layout;
+    }
+
+private:
+    VKUpdateDescriptorQueue* update_descriptor_queue;
+    Shader::Info info;
+
+    vk::ShaderModule spv_module;
+    vk::DescriptorSetLayout descriptor_set_layout;
+    DescriptorAllocator descriptor_allocator;
+    vk::PipelineLayout pipeline_layout;
+    vk::DescriptorUpdateTemplateKHR descriptor_update_template;
+    vk::Pipeline pipeline;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index ef9fb59103..3bea1ff449 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -19,9 +19,7 @@ constexpr std::size_t SETS_GROW_RATE = 0x20;
 DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
                                          VkDescriptorSetLayout layout_)
     : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
-      descriptor_pool{descriptor_pool_}, layout{layout_} {}
-
-DescriptorAllocator::~DescriptorAllocator() = default;
+      descriptor_pool{&descriptor_pool_}, layout{layout_} {}
 
 VkDescriptorSet DescriptorAllocator::Commit() {
     const std::size_t index = CommitResource();
@@ -29,7 +27,7 @@ VkDescriptorSet DescriptorAllocator::Commit() {
 }
 
 void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
-    descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
+    descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin));
 }
 
 VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler)
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index f892be7bed..2501f9967e 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -17,8 +17,12 @@ class VKScheduler;
 
 class DescriptorAllocator final : public ResourcePool {
 public:
+    explicit DescriptorAllocator() = default;
     explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
-    ~DescriptorAllocator() override;
+    ~DescriptorAllocator() override = default;
+
+    DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
+    DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
 
     DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
     DescriptorAllocator(const DescriptorAllocator&) = delete;
@@ -29,8 +33,8 @@ protected:
     void Allocate(std::size_t begin, std::size_t end) override;
 
 private:
-    VKDescriptorPool& descriptor_pool;
-    const VkDescriptorSetLayout layout;
+    VKDescriptorPool* descriptor_pool{};
+    VkDescriptorSetLayout layout{};
 
     std::vector<vk::DescriptorSets> descriptors_allocations;
 };
diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h
new file mode 100644
index 0000000000..b062884035
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_pipeline.h
@@ -0,0 +1,36 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+class Pipeline {
+public:
+    /// Add a reference count to the pipeline
+    void AddRef() noexcept {
+        ++ref_count;
+    }
+
+    [[nodiscard]] bool RemoveRef() noexcept {
+        --ref_count;
+        return ref_count == 0;
+    }
+
+    [[nodiscard]] u64 UsageTick() const noexcept {
+        return usage_tick;
+    }
+
+protected:
+    u64 usage_tick{};
+
+private:
+    size_t ref_count{};
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 7d0ba1180d..4bf3e4819c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -12,6 +12,8 @@
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/memory.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/recompiler.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
@@ -22,43 +24,105 @@
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/shader_cache.h"
 #include "video_core/shader_notify.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
+#pragma optimize("", off)
+
 namespace Vulkan {
 MICROPROFILE_DECLARE(Vulkan_PipelineCache);
 
 using Tegra::Engines::ShaderType;
 
 namespace {
-size_t StageFromProgram(size_t program) {
-    return program == 0 ? 0 : program - 1;
-}
+class Environment final : public Shader::Environment {
+public:
+    explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_,
+                         Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
+        : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {}
+
+    ~Environment() override = default;
+
+    [[nodiscard]] std::optional<u128> Analyze(u32 start_address) {
+        const std::optional<u64> size{TryFindSize(start_address)};
+        if (!size) {
+            return std::nullopt;
+        }
+        cached_lowest = start_address;
+        cached_highest = start_address + static_cast<u32>(*size);
+        return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size());
+    }
 
-ShaderType StageFromProgram(Maxwell::ShaderProgram program) {
-    return static_cast<ShaderType>(StageFromProgram(static_cast<size_t>(program)));
-}
+    [[nodiscard]] size_t ShaderSize() const noexcept {
+        return read_highest - read_lowest + INST_SIZE;
+    }
 
-ShaderType GetShaderType(Maxwell::ShaderProgram program) {
-    switch (program) {
-    case Maxwell::ShaderProgram::VertexB:
-        return ShaderType::Vertex;
-    case Maxwell::ShaderProgram::TesselationControl:
-        return ShaderType::TesselationControl;
-    case Maxwell::ShaderProgram::TesselationEval:
-        return ShaderType::TesselationEval;
-    case Maxwell::ShaderProgram::Geometry:
-        return ShaderType::Geometry;
-    case Maxwell::ShaderProgram::Fragment:
-        return ShaderType::Fragment;
-    default:
-        UNIMPLEMENTED_MSG("program={}", program);
-        return ShaderType::Vertex;
+    [[nodiscard]] u128 ComputeHash() const {
+        const size_t size{ShaderSize()};
+        auto data = std::make_unique<u64[]>(size);
+        gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size);
+        return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size);
     }
-}
+
+    u64 ReadInstruction(u32 address) override {
+        read_lowest = std::min(read_lowest, address);
+        read_highest = std::max(read_highest, address);
+
+        if (address >= cached_lowest && address < cached_highest) {
+            return code[address / INST_SIZE];
+        }
+        return gpu_memory.Read<u64>(program_base + address);
+    }
+
+    std::array<u32, 3> WorkgroupSize() override {
+        const auto& qmd{kepler_compute.launch_description};
+        return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
+    }
+
+private:
+    static constexpr size_t INST_SIZE = sizeof(u64);
+    static constexpr size_t BLOCK_SIZE = 0x1000;
+    static constexpr size_t MAXIMUM_SIZE = 0x100000;
+
+    static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
+    static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
+
+    std::optional<u64> TryFindSize(u32 start_address) {
+        GPUVAddr guest_addr = program_base + start_address;
+        size_t offset = 0;
+        size_t size = BLOCK_SIZE;
+        while (size <= MAXIMUM_SIZE) {
+            code.resize(size / INST_SIZE);
+            u64* const data = code.data() + offset / INST_SIZE;
+            gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE);
+            for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) {
+                const u64 inst = data[i / INST_SIZE];
+                if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
+                    return offset + i;
+                }
+            }
+            guest_addr += BLOCK_SIZE;
+            size += BLOCK_SIZE;
+            offset += BLOCK_SIZE;
+        }
+        return std::nullopt;
+    }
+
+    Tegra::Engines::KeplerCompute& kepler_compute;
+    Tegra::MemoryManager& gpu_memory;
+    GPUVAddr program_base;
+
+    u32 read_lowest = 0;
+    u32 read_highest = 0;
+
+    std::vector<u64> code;
+    u32 cached_lowest = std::numeric_limits<u32>::max();
+    u32 cached_highest = 0;
+};
 } // Anonymous namespace
 
 size_t ComputePipelineCacheKey::Hash() const noexcept {
@@ -70,35 +134,91 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
     return std::memcmp(&rhs, this, sizeof *this) == 0;
 }
 
-Shader::Shader() = default;
-
-Shader::~Shader() = default;
-
 PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
                              Tegra::Engines::Maxwell3D& maxwell3d_,
                              Tegra::Engines::KeplerCompute& kepler_compute_,
                              Tegra::MemoryManager& gpu_memory_, const Device& device_,
                              VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
                              VKUpdateDescriptorQueue& update_descriptor_queue_)
-    : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
+    : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
       kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
       scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
                                                                     update_descriptor_queue_} {}
 
 PipelineCache::~PipelineCache() = default;
 
-ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
+ComputePipeline* PipelineCache::CurrentComputePipeline() {
     MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 
-    const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
-    auto& entry = pair->second;
-    if (!is_cache_miss) {
-        return *entry;
+    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+    const auto& qmd{kepler_compute.launch_description};
+    const GPUVAddr shader_addr{program_base + qmd.program_start};
+    const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+    if (!cpu_shader_addr) {
+        return nullptr;
+    }
+    ShaderInfo* const shader{TryGet(*cpu_shader_addr)};
+    if (!shader) {
+        return CreateComputePipelineWithoutShader(*cpu_shader_addr);
+    }
+    const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)};
+    const auto [pair, is_new]{compute_cache.try_emplace(key)};
+    auto& pipeline{pair->second};
+    if (!is_new) {
+        return &pipeline;
+    }
+    pipeline = CreateComputePipeline(shader);
+    shader->compute_users.push_back(key);
+    return &pipeline;
+}
+
+ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {
+    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+    const auto& qmd{kepler_compute.launch_description};
+    Environment env{kepler_compute, gpu_memory, program_base};
+    if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) {
+        // TODO: Load from cache
     }
-    LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-    throw "Bad";
+    const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)};
+    shader_info->unique_hash = env.ComputeHash();
+    shader_info->size_bytes = env.ShaderSize();
+    return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info,
+                           BuildShader(device, code)};
 }
 
-void PipelineCache::OnShaderRemoval(Shader*) {}
+ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) {
+    ShaderInfo shader;
+    ComputePipeline pipeline{CreateComputePipeline(&shader)};
+    const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)};
+    shader.compute_users.push_back(key);
+    pipeline.AddRef();
+
+    const size_t size_bytes{shader.size_bytes};
+    Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes);
+    return &compute_cache.emplace(key, std::move(pipeline)).first->second;
+}
+
+ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const {
+    const auto& qmd{kepler_compute.launch_description};
+    return {
+        .unique_hash = unique_hash,
+        .shared_memory_size = qmd.shared_alloc,
+        .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
+    };
+}
+
+void PipelineCache::OnShaderRemoval(ShaderInfo* shader) {
+    for (const ComputePipelineCacheKey& key : shader->compute_users) {
+        const auto it = compute_cache.find(key);
+        ASSERT(it != compute_cache.end());
+
+        Pipeline& pipeline = it->second;
+        if (pipeline.RemoveRef()) {
+            // Wait for the pipeline to be free of GPU usage before destroying it
+            scheduler.Wait(pipeline.UsageTick());
+            compute_cache.erase(it);
+        }
+    }
+}
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index e3e63340dd..eb35abc27f 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -36,7 +36,7 @@ class VKUpdateDescriptorQueue;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 struct ComputePipelineCacheKey {
-    GPUVAddr shader;
+    u128 unique_hash;
     u32 shared_memory_size;
     std::array<u32, 3> workgroup_size;
 
@@ -67,13 +67,13 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
 
 namespace Vulkan {
 
-class Shader {
-public:
-    explicit Shader();
-    ~Shader();
+struct ShaderInfo {
+    u128 unique_hash{};
+    size_t size_bytes{};
+    std::vector<ComputePipelineCacheKey> compute_users;
 };
 
-class PipelineCache final : public VideoCommon::ShaderCache<Shader> {
+class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> {
 public:
     explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
                            Tegra::Engines::Maxwell3D& maxwell3d,
@@ -83,12 +83,18 @@ public:
                            VKUpdateDescriptorQueue& update_descriptor_queue);
     ~PipelineCache() override;
 
-    ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
+    [[nodiscard]] ComputePipeline* CurrentComputePipeline();
 
 protected:
-    void OnShaderRemoval(Shader* shader) final;
+    void OnShaderRemoval(ShaderInfo* shader) override;
 
 private:
+    ComputePipeline CreateComputePipeline(ShaderInfo* shader);
+
+    ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr);
+
+    ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const;
+
     Tegra::GPU& gpu;
     Tegra::Engines::Maxwell3D& maxwell3d;
     Tegra::Engines::KeplerCompute& kepler_compute;
@@ -99,13 +105,7 @@ private:
     VKDescriptorPool& descriptor_pool;
     VKUpdateDescriptorQueue& update_descriptor_queue;
 
-    std::unique_ptr<Shader> null_shader;
-    std::unique_ptr<Shader> null_kernel;
-
-    std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
-
-    std::mutex pipeline_cache;
-    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
+    std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f152297d94..b757454c49 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -36,6 +36,8 @@
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
+#pragma optimize("", off)
+
 namespace Vulkan {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -237,7 +239,26 @@ void RasterizerVulkan::Clear() {
 }
 
 void RasterizerVulkan::DispatchCompute() {
-    UNREACHABLE_MSG("Not implemented");
+    ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
+    if (!pipeline) {
+        return;
+    }
+    std::scoped_lock lock{buffer_cache.mutex};
+    update_descriptor_queue.Acquire();
+    pipeline->ConfigureBufferCache(buffer_cache);
+    const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()};
+
+    const auto& qmd{kepler_compute.launch_description};
+    const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
+    const VkPipeline pipeline_handle{pipeline->Handle()};
+    const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()};
+    scheduler.Record(
+        [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) {
+            cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
+            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0,
+                                      descriptor_set, nullptr);
+            cmdbuf.Dispatch(dim[0], dim[1], dim[2]);
+        });
 }
 
 void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 31017dc2b0..3fd03b9155 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -21,7 +21,6 @@
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_fence_manager.h"
-#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -150,8 +149,6 @@ private:
     BlitImageHelper blit_image;
     ASTCDecoderPass astc_decoder_pass;
 
-    GraphicsPipelineCacheKey graphics_key;
-
     TextureCacheRuntime texture_cache_runtime;
     TextureCache texture_cache;
     BufferCacheRuntime buffer_cache_runtime;
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index a8bf7bda8c..2dd5149681 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -10,18 +10,16 @@
 namespace Vulkan {
 
 ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
-    : master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
-
-ResourcePool::~ResourcePool() = default;
+    : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
 
 size_t ResourcePool::CommitResource() {
     // Refresh semaphore to query updated results
-    master_semaphore.Refresh();
-    const u64 gpu_tick = master_semaphore.KnownGpuTick();
+    master_semaphore->Refresh();
+    const u64 gpu_tick = master_semaphore->KnownGpuTick();
     const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
         for (size_t iterator = begin; iterator < end; ++iterator) {
             if (gpu_tick >= ticks[iterator]) {
-                ticks[iterator] = master_semaphore.CurrentTick();
+                ticks[iterator] = master_semaphore->CurrentTick();
                 return iterator;
             }
         }
@@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() {
             // Both searches failed, the pool is full; handle it.
             const size_t free_resource = ManageOverflow();
 
-            ticks[free_resource] = master_semaphore.CurrentTick();
+            ticks[free_resource] = master_semaphore->CurrentTick();
             found = free_resource;
         }
     }
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index 9d0bb3b4d0..f0b80ad59e 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -18,8 +18,16 @@ class MasterSemaphore;
  */
 class ResourcePool {
 public:
+    explicit ResourcePool() = default;
     explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
-    virtual ~ResourcePool();
+
+    virtual ~ResourcePool() = default;
+
+    ResourcePool& operator=(ResourcePool&&) noexcept = default;
+    ResourcePool(ResourcePool&&) noexcept = default;
+
+    ResourcePool& operator=(const ResourcePool&) = default;
+    ResourcePool(const ResourcePool&) = default;
 
 protected:
     size_t CommitResource();
@@ -34,7 +42,7 @@ private:
     /// Allocates a new page of resources.
     void Grow();
 
-    MasterSemaphore& master_semaphore;
+    MasterSemaphore* master_semaphore{};
     size_t grow_step = 0;     ///< Number of new resources created after an overflow
     size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
     std::vector<u64> ticks;   ///< Ticks for each resource
-- 
cgit v1.2.3-70-g09d2


From 704c6f353f68745168902c6c66c04bb730bd30e6 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 21 Feb 2021 17:50:14 -0300
Subject: shader: Rename, implement FADD.SAT and P2R (imm)

---
 src/shader_recompiler/CMakeLists.txt               |  3 +-
 src/shader_recompiler/backend/spirv/emit_spirv.h   | 40 ++++++------
 .../backend/spirv/emit_spirv_floating_point.cpp    | 58 +++++++----------
 .../backend/spirv/emit_spirv_integer.cpp           | 75 +++++++++++++++-------
 .../backend/spirv/emit_spirv_select.cpp            |  4 +-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 16 ++---
 src/shader_recompiler/frontend/ir/pred.h           |  4 +-
 src/shader_recompiler/frontend/maxwell/program.cpp |  2 +-
 .../maxwell/translate/impl/floating_point_add.cpp  | 20 +++---
 .../impl/floating_point_conversion_integer.cpp     |  2 +-
 .../impl/floating_point_fused_multiply_add.cpp     |  4 +-
 .../translate/impl/floating_point_multiply.cpp     |  2 +-
 .../frontend/maxwell/translate/impl/impl.cpp       | 17 ++++-
 .../frontend/maxwell/translate/impl/impl.h         |  7 +-
 .../maxwell/translate/impl/integer_add.cpp         |  4 +-
 .../translate/impl/move_predicate_to_register.cpp  | 66 +++++++++++++++++++
 .../maxwell/translate/impl/not_implemented.cpp     | 12 ----
 .../renderer_vulkan/vk_pipeline_cache.cpp          |  4 +-
 18 files changed, 213 insertions(+), 127 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index fbd4ec6dca..8025272555 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -74,9 +74,10 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
     frontend/maxwell/translate/impl/load_store_attribute.cpp
     frontend/maxwell/translate/impl/load_store_memory.cpp
-    frontend/maxwell/translate/impl/not_implemented.cpp
+    frontend/maxwell/translate/impl/move_predicate_to_register.cpp
     frontend/maxwell/translate/impl/move_register.cpp
     frontend/maxwell/translate/impl/move_special_register.cpp
+    frontend/maxwell/translate/impl/not_implemented.cpp
     frontend/maxwell/translate/translate.cpp
     frontend/maxwell/translate/translate.h
     ir_opt/collect_shader_info_pass.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index de624a1510..922e294a78 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -110,7 +110,7 @@ void EmitCompositeExtractF64x3(EmitContext& ctx);
 void EmitCompositeExtractF64x4(EmitContext& ctx);
 void EmitSelect8(EmitContext& ctx);
 void EmitSelect16(EmitContext& ctx);
-void EmitSelect32(EmitContext& ctx);
+Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
 void EmitSelect64(EmitContext& ctx);
 void EmitBitCastU16F16(EmitContext& ctx);
 Id EmitBitCastU32F32(EmitContext& ctx, Id value);
@@ -130,9 +130,9 @@ void EmitGetZeroFromOp(EmitContext& ctx);
 void EmitGetSignFromOp(EmitContext& ctx);
 void EmitGetCarryFromOp(EmitContext& ctx);
 void EmitGetOverflowFromOp(EmitContext& ctx);
-void EmitFPAbs16(EmitContext& ctx);
-void EmitFPAbs32(EmitContext& ctx);
-void EmitFPAbs64(EmitContext& ctx);
+Id EmitFPAbs16(EmitContext& ctx, Id value);
+Id EmitFPAbs32(EmitContext& ctx, Id value);
+Id EmitFPAbs64(EmitContext& ctx, Id value);
 Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
@@ -146,9 +146,9 @@ void EmitFPMin64(EmitContext& ctx);
 Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
-void EmitFPNeg16(EmitContext& ctx);
-void EmitFPNeg32(EmitContext& ctx);
-void EmitFPNeg64(EmitContext& ctx);
+Id EmitFPNeg16(EmitContext& ctx, Id value);
+Id EmitFPNeg32(EmitContext& ctx, Id value);
+Id EmitFPNeg64(EmitContext& ctx, Id value);
 void EmitFPRecip32(EmitContext& ctx);
 void EmitFPRecip64(EmitContext& ctx);
 void EmitFPRecipSqrt32(EmitContext& ctx);
@@ -161,9 +161,9 @@ void EmitFPExp2NotReduced(EmitContext& ctx);
 void EmitFPCos(EmitContext& ctx);
 void EmitFPCosNotReduced(EmitContext& ctx);
 void EmitFPLog2(EmitContext& ctx);
-void EmitFPSaturate16(EmitContext& ctx);
-void EmitFPSaturate32(EmitContext& ctx);
-void EmitFPSaturate64(EmitContext& ctx);
+Id EmitFPSaturate16(EmitContext& ctx, Id value);
+Id EmitFPSaturate32(EmitContext& ctx, Id value);
+Id EmitFPSaturate64(EmitContext& ctx, Id value);
 Id EmitFPRoundEven16(EmitContext& ctx, Id value);
 Id EmitFPRoundEven32(EmitContext& ctx, Id value);
 Id EmitFPRoundEven64(EmitContext& ctx, Id value);
@@ -186,21 +186,21 @@ void EmitIAbs32(EmitContext& ctx);
 Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift);
 void EmitShiftRightLogical32(EmitContext& ctx);
 void EmitShiftRightArithmetic32(EmitContext& ctx);
-void EmitBitwiseAnd32(EmitContext& ctx);
-void EmitBitwiseOr32(EmitContext& ctx);
-void EmitBitwiseXor32(EmitContext& ctx);
+Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b);
+Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b);
+Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b);
 void EmitBitFieldInsert(EmitContext& ctx);
 void EmitBitFieldSExtract(EmitContext& ctx);
 Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count);
 Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs);
-void EmitULessThan(EmitContext& ctx);
-void EmitIEqual(EmitContext& ctx);
-void EmitSLessThanEqual(EmitContext& ctx);
-void EmitULessThanEqual(EmitContext& ctx);
+Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
-void EmitUGreaterThan(EmitContext& ctx);
-void EmitINotEqual(EmitContext& ctx);
-void EmitSGreaterThanEqual(EmitContext& ctx);
+Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
 void EmitLogicalOr(EmitContext& ctx);
 void EmitLogicalAnd(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
index c9687de377..47f87054b4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -12,37 +12,21 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
     if (flags.no_contraction) {
         ctx.Decorate(op, spv::Decoration::NoContraction);
     }
-    switch (flags.rounding) {
-    case IR::FpRounding::DontCare:
-        break;
-    case IR::FpRounding::RN:
-        ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE);
-        break;
-    case IR::FpRounding::RM:
-        ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN);
-        break;
-    case IR::FpRounding::RP:
-        ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTP);
-        break;
-    case IR::FpRounding::RZ:
-        ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ);
-        break;
-    }
     return op;
 }
 
 } // Anonymous namespace
 
-void EmitFPAbs16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPAbs16(EmitContext& ctx, Id value) {
+    return ctx.OpFAbs(ctx.F16[1], value);
 }
 
-void EmitFPAbs32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPAbs32(EmitContext& ctx, Id value) {
+    return ctx.OpFAbs(ctx.F32[1], value);
 }
 
-void EmitFPAbs64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPAbs64(EmitContext& ctx, Id value) {
+    return ctx.OpFAbs(ctx.F64[1], value);
 }
 
 Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
@@ -97,16 +81,16 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
     return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
 }
 
-void EmitFPNeg16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPNeg16(EmitContext& ctx, Id value) {
+    return ctx.OpFNegate(ctx.F16[1], value);
 }
 
-void EmitFPNeg32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPNeg32(EmitContext& ctx, Id value) {
+    return ctx.OpFNegate(ctx.F32[1], value);
 }
 
-void EmitFPNeg64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPNeg64(EmitContext& ctx, Id value) {
+    return ctx.OpFNegate(ctx.F64[1], value);
 }
 
 void EmitFPRecip32(EmitContext&) {
@@ -157,16 +141,22 @@ void EmitFPLog2(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitFPSaturate16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPSaturate16(EmitContext& ctx, Id value) {
+    const Id zero{ctx.Constant(ctx.F16[1], u16{0})};
+    const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})};
+    return ctx.OpFClamp(ctx.F32[1], value, zero, one);
 }
 
-void EmitFPSaturate32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPSaturate32(EmitContext& ctx, Id value) {
+    const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})};
+    const Id one{ctx.Constant(ctx.F32[1], f32{1.0})};
+    return ctx.OpFClamp(ctx.F32[1], value, zero, one);
 }
 
-void EmitFPSaturate64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPSaturate64(EmitContext& ctx, Id value) {
+    const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})};
+    const Id one{ctx.Constant(ctx.F64[1], f64{1.0})};
+    return ctx.OpFClamp(ctx.F64[1], value, zero, one);
 }
 
 Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
index 22117a4ee9..4c0b5990dc 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -7,10 +7,39 @@
 namespace Shader::Backend::SPIRV {
 
 Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
-    if (inst->HasAssociatedPseudoOperation()) {
-        throw NotImplementedException("Pseudo-operations on IAdd32");
+    Id result{};
+    if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) {
+        const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])};
+        const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)};
+        result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U);
+
+        const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)};
+        carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value));
+        carry->Invalidate();
+    } else {
+        result = ctx.OpIAdd(ctx.U32[1], a, b);
     }
-    return ctx.OpIAdd(ctx.U32[1], a, b);
+    if (IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}) {
+        zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value));
+        zero->Invalidate();
+    }
+    if (IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}) {
+        sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value));
+        sign->Invalidate();
+    }
+    if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) {
+        // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c
+        constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())};
+        const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)};
+        const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Constant(ctx.U32[1], s32_max), a)};
+
+        const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)};
+        const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)};
+        const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)};
+        overflow->SetDefinition(carry_flag);
+        overflow->Invalidate();
+    }
+    return result;
 }
 
 void EmitIAdd64(EmitContext&) {
@@ -49,16 +78,16 @@ void EmitShiftRightArithmetic32(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitBitwiseAnd32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpBitwiseAnd(ctx.U32[1], a, b);
 }
 
-void EmitBitwiseOr32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpBitwiseOr(ctx.U32[1], a, b);
 }
 
-void EmitBitwiseXor32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpBitwiseXor(ctx.U32[1], a, b);
 }
 
 void EmitBitFieldInsert(EmitContext&) {
@@ -77,36 +106,36 @@ Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpSLessThan(ctx.U1, lhs, rhs);
 }
 
-void EmitULessThan(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpULessThan(ctx.U1, lhs, rhs);
 }
 
-void EmitIEqual(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpIEqual(ctx.U1, lhs, rhs);
 }
 
-void EmitSLessThanEqual(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs);
 }
 
-void EmitULessThanEqual(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpULessThanEqual(ctx.U1, lhs, rhs);
 }
 
 Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
     return ctx.OpSGreaterThan(ctx.U1, lhs, rhs);
 }
 
-void EmitUGreaterThan(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpUGreaterThan(ctx.U1, lhs, rhs);
 }
 
-void EmitINotEqual(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpINotEqual(ctx.U1, lhs, rhs);
 }
 
-void EmitSGreaterThanEqual(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs);
 }
 
 Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
index 8d50627247..eb1926a4db 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
@@ -14,8 +14,8 @@ void EmitSelect16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSelect32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value);
 }
 
 void EmitSelect64(EmitContext&) {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 8f120a2f67..34c2f67fb3 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -468,11 +468,11 @@ F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F
 
 F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {
     switch (value.Type()) {
-    case Type::U16:
+    case Type::F16:
         return Inst<F16>(Opcode::FPAbs16, value);
-    case Type::U32:
+    case Type::F32:
         return Inst<F32>(Opcode::FPAbs32, value);
-    case Type::U64:
+    case Type::F64:
         return Inst<F64>(Opcode::FPAbs64, value);
     default:
         ThrowInvalidType(value.Type());
@@ -481,11 +481,11 @@ F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {
 
 F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {
     switch (value.Type()) {
-    case Type::U16:
+    case Type::F16:
         return Inst<F16>(Opcode::FPNeg16, value);
-    case Type::U32:
+    case Type::F32:
         return Inst<F32>(Opcode::FPNeg32, value);
-    case Type::U64:
+    case Type::F64:
         return Inst<F64>(Opcode::FPNeg64, value);
     default:
         ThrowInvalidType(value.Type());
@@ -495,10 +495,10 @@ F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {
 F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
     F16F32F64 result{value};
     if (abs) {
-        result = FPAbs(value);
+        result = FPAbs(result);
     }
     if (neg) {
-        result = FPNeg(value);
+        result = FPNeg(result);
     }
     return result;
 }
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h
index c6f2f82bfb..4e7f32423a 100644
--- a/src/shader_recompiler/frontend/ir/pred.h
+++ b/src/shader_recompiler/frontend/ir/pred.h
@@ -19,8 +19,8 @@ enum class Pred : u64 {
     PT,
 };
 
-constexpr size_t NUM_USER_PREDS = 6;
-constexpr size_t NUM_PREDS = 7;
+constexpr size_t NUM_USER_PREDS = 7;
+constexpr size_t NUM_PREDS = 8;
 
 [[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept {
     return static_cast<size_t>(pred);
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 16cdc12e2a..ed5dbf41fd 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -56,12 +56,12 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
             .post_order_blocks{},
         });
     }
+    fmt::print(stdout, "{}\n", IR::DumpProgram(program));
     Optimization::LowerFp16ToFp32(program);
     for (IR::Function& function : functions) {
         function.post_order_blocks = PostOrder(function.blocks);
         Optimization::SsaRewritePass(function.post_order_blocks);
     }
-    fmt::print(stdout, "{}\n", IR::DumpProgram(program));
     Optimization::GlobalMemoryToStorageBufferPass(program);
     for (IR::Function& function : functions) {
         Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
index cb3a326cfa..219ffcc6af 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -9,7 +9,6 @@
 
 namespace Shader::Maxwell {
 namespace {
-
 void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
           const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
     union {
@@ -18,9 +17,6 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin
         BitField<8, 8, IR::Reg> src_a;
     } const fadd{insn};
 
-    if (sat) {
-        throw NotImplementedException("FADD SAT");
-    }
     if (cc) {
         throw NotImplementedException("FADD CC");
     }
@@ -31,7 +27,11 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin
         .rounding{CastFpRounding(fp_rounding)},
         .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
     };
-    v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
+    IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
+    if (sat) {
+        value = v.ir.FPSaturate(value);
+    }
+    v.F(fadd.dest_reg, value);
 }
 
 void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
@@ -53,15 +53,15 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
 } // Anonymous namespace
 
 void TranslatorVisitor::FADD_reg(u64 insn) {
-    FADD(*this, insn, GetReg20F(insn));
+    FADD(*this, insn, GetRegFloat20(insn));
 }
 
-void TranslatorVisitor::FADD_cbuf(u64) {
-    throw NotImplementedException("FADD (cbuf)");
+void TranslatorVisitor::FADD_cbuf(u64 insn) {
+    FADD(*this, insn, GetFloatCbuf(insn));
 }
 
-void TranslatorVisitor::FADD_imm(u64) {
-    throw NotImplementedException("FADD (imm)");
+void TranslatorVisitor::FADD_imm(u64 insn) {
+    FADD(*this, insn, GetFloatImm20(insn));
 }
 
 void TranslatorVisitor::FADD32I(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
index 4d82a0009d..81175627fd 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -158,7 +158,7 @@ void TranslatorVisitor::F2I_cbuf(u64 insn) {
         case SrcFormat::F16:
             return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
         case SrcFormat::F32:
-            return GetCbufF(insn);
+            return GetFloatCbuf(insn);
         case SrcFormat::F64: {
             return UnpackCbuf(*this, insn);
         }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
index 1464f2807a..758700d3c4 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s
 } // Anonymous namespace
 
 void TranslatorVisitor::FFMA_reg(u64 insn) {
-    FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn));
+    FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn));
 }
 
 void TranslatorVisitor::FFMA_rc(u64) {
@@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) {
 }
 
 void TranslatorVisitor::FFMA_cr(u64 insn) {
-    FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn));
+    FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn));
 }
 
 void TranslatorVisitor::FFMA_imm(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
index 1b1d38be7a..5c38d3fc11 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -91,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
 } // Anonymous namespace
 
 void TranslatorVisitor::FMUL_reg(u64 insn) {
-    return FMUL(*this, insn, GetReg20F(insn));
+    return FMUL(*this, insn, GetRegFloat20(insn));
 }
 
 void TranslatorVisitor::FMUL_cbuf(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 079e3497f2..be17bb0d9f 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
     return X(reg.index);
 }
 
-IR::F32 TranslatorVisitor::GetReg20F(u64 insn) {
+IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) {
     return ir.BitCast<IR::F32>(GetReg20(insn));
 }
 
-IR::F32 TranslatorVisitor::GetReg39F(u64 insn) {
+IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) {
     return ir.BitCast<IR::F32>(GetReg39(insn));
 }
 
@@ -73,7 +73,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
     return ir.GetCbuf(binding, byte_offset);
 }
 
-IR::F32 TranslatorVisitor::GetCbufF(u64 insn) {
+IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
     return ir.BitCast<IR::F32>(GetCbuf(insn));
 }
 
@@ -88,6 +88,17 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
     return ir.Imm32(value);
 }
 
+IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 19, u64> value;
+        BitField<56, 1, u64> is_negative;
+    } const imm{insn};
+    const f32 positive_value{Common::BitCast<f32>(static_cast<u32>(imm.value) << 12)};
+    const f32 value{imm.is_negative != 0 ? -positive_value : positive_value};
+    return ir.Imm32(value);
+}
+
 IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
     union {
         u64 raw;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index 27aba2cf87..4d4cf2ebfb 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -304,13 +304,14 @@ public:
     [[nodiscard]] IR::U32 GetReg8(u64 insn);
     [[nodiscard]] IR::U32 GetReg20(u64 insn);
     [[nodiscard]] IR::U32 GetReg39(u64 insn);
-    [[nodiscard]] IR::F32 GetReg20F(u64 insn);
-    [[nodiscard]] IR::F32 GetReg39F(u64 insn);
+    [[nodiscard]] IR::F32 GetRegFloat20(u64 insn);
+    [[nodiscard]] IR::F32 GetRegFloat39(u64 insn);
 
     [[nodiscard]] IR::U32 GetCbuf(u64 insn);
-    [[nodiscard]] IR::F32 GetCbufF(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
 
     [[nodiscard]] IR::U32 GetImm20(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
 
     [[nodiscard]] IR::U32 GetImm32(u64 insn);
 
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
index 623e78ff85..1493e18151 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -84,8 +84,8 @@ void TranslatorVisitor::IADD_cbuf(u64 insn) {
     IADD(*this, insn, GetCbuf(insn));
 }
 
-void TranslatorVisitor::IADD_imm(u64) {
-    throw NotImplementedException("IADD (imm)");
+void TranslatorVisitor::IADD_imm(u64 insn) {
+    IADD(*this, insn, GetImm20(insn));
 }
 
 void TranslatorVisitor::IADD32I(u64 insn) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
new file mode 100644
index 0000000000..4324fd443c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+    PR,
+    CC,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::P2R_reg(u64) {
+    throw NotImplementedException("P2R (reg)");
+}
+
+void TranslatorVisitor::P2R_cbuf(u64) {
+    throw NotImplementedException("P2R (cbuf)");
+}
+
+void TranslatorVisitor::P2R_imm(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src;
+        BitField<40, 1, Mode> mode;
+        BitField<41, 2, u64> byte_selector;
+    } const p2r{insn};
+
+    const u32 mask{GetImm20(insn).U32()};
+    const bool pr_mode{p2r.mode == Mode::PR};
+    const u32 num_items{pr_mode ? 7U : 4U};
+    const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
+    IR::U32 insert{ir.Imm32(0)};
+    for (u32 index = 0; index < num_items; ++index) {
+        if (((mask >> index) & 1) == 0) {
+            continue;
+        }
+        const IR::U1 cond{[this, index, pr_mode] {
+            if (pr_mode) {
+                return ir.GetPred(IR::Pred{index});
+            }
+            switch (index) {
+            case 0:
+                return ir.GetZFlag();
+            case 1:
+                return ir.GetSFlag();
+            case 2:
+                return ir.GetCFlag();
+            case 3:
+                return ir.GetOFlag();
+            }
+            throw LogicError("Unreachable P2R index");
+        }()};
+        const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
+        insert = ir.BitwiseOr(insert, bit);
+    }
+    const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
+    X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 6b2a1356bc..628cf1c14e 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -633,18 +633,6 @@ void TranslatorVisitor::OUT_imm(u64) {
     ThrowNotImplemented(Opcode::OUT_imm);
 }
 
-void TranslatorVisitor::P2R_reg(u64) {
-    ThrowNotImplemented(Opcode::P2R_reg);
-}
-
-void TranslatorVisitor::P2R_cbuf(u64) {
-    ThrowNotImplemented(Opcode::P2R_cbuf);
-}
-
-void TranslatorVisitor::P2R_imm(u64) {
-    ThrowNotImplemented(Opcode::P2R_imm);
-}
-
 void TranslatorVisitor::PBK() {
     // PBK is a no-op
 }
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 49ff911d6a..b25af6cd3e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -191,12 +191,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {
         .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
     };
     const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)};
-
+    /*
     FILE* file = fopen("D:\\shader.spv", "wb");
     fwrite(code.data(), 4, code.size(), file);
     fclose(file);
     std::system("spirv-dis D:\\shader.spv");
-
+    */
     shader_info->unique_hash = env.ComputeHash();
     shader_info->size_bytes = env.ShaderSize();
     return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info,
-- 
cgit v1.2.3-70-g09d2


From 274897dfd59b4d08029ab7e93be4f84654abcdc8 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 21 Feb 2021 23:42:38 -0300
Subject: spirv: Fixes and Intel specific workarounds

---
 src/shader_recompiler/backend/spirv/emit_context.cpp     |  3 ++-
 src/shader_recompiler/backend/spirv/emit_context.h       |  5 ++++-
 src/shader_recompiler/backend/spirv/emit_spirv.cpp       |  6 +++---
 src/shader_recompiler/backend/spirv/emit_spirv.h         |  8 ++++----
 .../backend/spirv/emit_spirv_floating_point.cpp          | 13 ++++++++++---
 .../backend/spirv/emit_spirv_logical.cpp                 | 16 ++++++++--------
 .../frontend/ir/structured_control_flow.cpp              |  3 ---
 src/shader_recompiler/frontend/maxwell/program.cpp       |  3 ---
 .../frontend/maxwell/translate/impl/impl.cpp             | 15 +++++++++------
 src/shader_recompiler/profile.h                          |  3 +++
 src/video_core/renderer_vulkan/vk_pipeline_cache.cpp     |  1 +
 11 files changed, 44 insertions(+), 32 deletions(-)

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index ea1c8a3be1..d2dbd56d44 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -25,7 +25,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
     }
 }
 
-EmitContext::EmitContext(IR::Program& program) : Sirit::Module(0x00010000) {
+EmitContext::EmitContext(const Profile& profile_, IR::Program& program)
+    : Sirit::Module(0x00010000), profile{profile_} {
     AddCapability(spv::Capability::Shader);
     DefineCommonTypes(program.info);
     DefineCommonConstants();
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 8de203da2a..d20cf387ef 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -11,6 +11,7 @@
 
 #include "shader_recompiler/frontend/ir/program.h"
 #include "shader_recompiler/shader_info.h"
+#include "shader_recompiler/profile.h"
 
 namespace Shader::Backend::SPIRV {
 
@@ -30,11 +31,13 @@ private:
 
 class EmitContext final : public Sirit::Module {
 public:
-    explicit EmitContext(IR::Program& program);
+    explicit EmitContext(const Profile& profile, IR::Program& program);
     ~EmitContext();
 
     [[nodiscard]] Id Def(const IR::Value& value);
 
+    const Profile& profile;
+
     Id void_id{};
     Id U1{};
     Id U16{};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 2519e446ae..f3aca90d06 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -150,11 +150,11 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit
     } else if (info.uses_fp16_denorms_flush) {
         if (profile.support_fp16_denorm_flush) {
             ctx.AddCapability(spv::Capability::DenormFlushToZero);
-            ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U);
+            ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U);
         } else {
             // Same as fp32, no need to warn as most drivers will flush by default
         }
-    } else if (info.uses_fp32_denorms_preserve) {
+    } else if (info.uses_fp16_denorms_preserve) {
         if (profile.support_fp16_denorm_preserve) {
             ctx.AddCapability(spv::Capability::DenormPreserve);
             ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U);
@@ -166,7 +166,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit
 } // Anonymous namespace
 
 std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) {
-    EmitContext ctx{program};
+    EmitContext ctx{profile, program};
     const Id void_function{ctx.TypeFunction(ctx.void_id)};
     // FIXME: Forward declare functions (needs sirit support)
     Id func{};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 922e294a78..cec80c13e6 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -202,10 +202,10 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
-void EmitLogicalOr(EmitContext& ctx);
-void EmitLogicalAnd(EmitContext& ctx);
-void EmitLogicalXor(EmitContext& ctx);
-void EmitLogicalNot(EmitContext& ctx);
+Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalNot(EmitContext& ctx, Id value);
 Id EmitConvertS16F16(EmitContext& ctx, Id value);
 Id EmitConvertS16F32(EmitContext& ctx, Id value);
 Id EmitConvertS16F64(EmitContext& ctx, Id value);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
index 47f87054b4..5d0b74f9b9 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -15,6 +15,13 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
     return op;
 }
 
+Id Saturate(EmitContext& ctx, Id type, Id value, Id zero, Id one) {
+    if (ctx.profile.has_broken_spirv_clamp) {
+        return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one);
+    } else {
+        return ctx.OpFClamp(type, value, zero, one);
+    }
+}
 } // Anonymous namespace
 
 Id EmitFPAbs16(EmitContext& ctx, Id value) {
@@ -144,19 +151,19 @@ void EmitFPLog2(EmitContext&) {
 Id EmitFPSaturate16(EmitContext& ctx, Id value) {
     const Id zero{ctx.Constant(ctx.F16[1], u16{0})};
     const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})};
-    return ctx.OpFClamp(ctx.F32[1], value, zero, one);
+    return Saturate(ctx, ctx.F16[1], value, zero, one);
 }
 
 Id EmitFPSaturate32(EmitContext& ctx, Id value) {
     const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})};
     const Id one{ctx.Constant(ctx.F32[1], f32{1.0})};
-    return ctx.OpFClamp(ctx.F32[1], value, zero, one);
+    return Saturate(ctx, ctx.F32[1], value, zero, one);
 }
 
 Id EmitFPSaturate64(EmitContext& ctx, Id value) {
     const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})};
     const Id one{ctx.Constant(ctx.F64[1], f64{1.0})};
-    return ctx.OpFClamp(ctx.F64[1], value, zero, one);
+    return Saturate(ctx, ctx.F64[1], value, zero, one);
 }
 
 Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
index c5a07252f1..bb434def2e 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
@@ -6,20 +6,20 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitLogicalOr(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpLogicalOr(ctx.U1, a, b);
 }
 
-void EmitLogicalAnd(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpLogicalAnd(ctx.U1, a, b);
 }
 
-void EmitLogicalXor(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) {
+    return ctx.OpLogicalNotEqual(ctx.U1, a, b);
 }
 
-void EmitLogicalNot(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitLogicalNot(EmitContext& ctx, Id value) {
+    return ctx.OpLogicalNot(ctx.U1, value);
 }
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp
index d145095d14..032ac8fda4 100644
--- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp
@@ -272,11 +272,9 @@ public:
     explicit GotoPass(std::span<Block* const> blocks, ObjectPool<Statement>& stmt_pool)
         : pool{stmt_pool} {
         std::vector gotos{BuildUnorderedTreeGetGotos(blocks)};
-        fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children));
         for (const Node& goto_stmt : gotos | std::views::reverse) {
             RemoveGoto(goto_stmt);
         }
-        fmt::print(stdout, "AFTER\n{}\n", DumpTree(root_stmt.children));
     }
 
     Statement& RootStatement() noexcept {
@@ -548,7 +546,6 @@ private:
     size_t Offset(ConstNode stmt) const {
         size_t offset{0};
         if (!SearchNode(root_stmt.children, stmt, offset)) {
-            fmt::print(stdout, "{}\n", DumpTree(root_stmt.children));
             throw LogicError("Node not found in tree");
         }
         return offset;
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index ed5dbf41fd..dbfc04f75e 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -56,7 +56,6 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
             .post_order_blocks{},
         });
     }
-    fmt::print(stdout, "{}\n", IR::DumpProgram(program));
     Optimization::LowerFp16ToFp32(program);
     for (IR::Function& function : functions) {
         function.post_order_blocks = PostOrder(function.blocks);
@@ -70,8 +69,6 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
         Optimization::VerificationPass(function);
     }
     Optimization::CollectShaderInfoPass(program);
-
-    fmt::print(stdout, "{}\n", IR::DumpProgram(program));
     return program;
 }
 
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index be17bb0d9f..165d475b92 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -83,9 +83,12 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
         BitField<20, 19, u64> value;
         BitField<56, 1, u64> is_negative;
     } const imm{insn};
-    const s32 positive_value{static_cast<s32>(imm.value)};
-    const s32 value{imm.is_negative != 0 ? -positive_value : positive_value};
-    return ir.Imm32(value);
+    if (imm.is_negative != 0) {
+        const s64 raw{static_cast<s64>(imm.value)};
+        return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
+    } else {
+        return ir.Imm32(static_cast<u32>(imm.value));
+    }
 }
 
 IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
@@ -94,9 +97,9 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
         BitField<20, 19, u64> value;
         BitField<56, 1, u64> is_negative;
     } const imm{insn};
-    const f32 positive_value{Common::BitCast<f32>(static_cast<u32>(imm.value) << 12)};
-    const f32 value{imm.is_negative != 0 ? -positive_value : positive_value};
-    return ir.Imm32(value);
+    const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0};
+    const u32 value{static_cast<u32>(imm.value) << 12};
+    return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
 }
 
 IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 9881bebab0..917fc12511 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -15,6 +15,9 @@ struct Profile {
     bool support_fp32_denorm_preserve{};
     bool support_fp16_denorm_flush{};
     bool support_fp32_denorm_flush{};
+
+    // FClamp is broken and OpFMax + OpFMin should be used instead
+    bool has_broken_spirv_clamp{};
 };
 
 } // namespace Shader
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index b25af6cd3e..2497c2385f 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -189,6 +189,7 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {
         .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
         .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
         .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
+        .has_broken_spirv_clamp = true, // TODO: is_intel
     };
     const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)};
     /*
-- 
cgit v1.2.3-70-g09d2


From e44752ddc8804961eb84f8c225bb36d5b4c77bc1 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 22 Feb 2021 22:59:16 -0300
Subject: shader: FMUL, select, RRO, and MUFU fixes

---
 src/shader_recompiler/CMakeLists.txt               |   1 +
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  67 +++++--
 .../backend/spirv/emit_spirv_floating_point.cpp    | 192 +++++++++++++++++----
 .../backend/spirv/emit_spirv_select.cpp            |  21 ++-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 144 ++++++++++++++--
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  18 +-
 src/shader_recompiler/frontend/ir/opcodes.inc      |  50 +++++-
 .../maxwell/translate/impl/common_encoding.h       |   3 +-
 .../maxwell/translate/impl/floating_point_add.cpp  |   2 +-
 .../impl/floating_point_fused_multiply_add.cpp     |   4 +-
 .../impl/floating_point_multi_function.cpp         |   8 +-
 .../translate/impl/floating_point_multiply.cpp     |  42 +++--
 .../impl/floating_point_range_reduction.cpp        |  41 +++++
 .../frontend/maxwell/translate/impl/impl.cpp       |  12 +-
 .../frontend/maxwell/translate/impl/impl.h         |   5 +-
 .../maxwell/translate/impl/integer_shift_left.cpp  |   2 +-
 .../maxwell/translate/impl/not_implemented.cpp     |  12 --
 .../ir_opt/constant_propagation_pass.cpp           |   2 +-
 18 files changed, 507 insertions(+), 119 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 8025272555..5574feaa66 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
     frontend/maxwell/translate/impl/floating_point_multi_function.cpp
     frontend/maxwell/translate/impl/floating_point_multiply.cpp
+    frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
     frontend/maxwell/translate/impl/impl.cpp
     frontend/maxwell/translate/impl/impl.h
     frontend/maxwell/translate/impl/integer_add.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 1b9be445e2..130c719961 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -108,10 +108,12 @@ void EmitCompositeConstructF64x4(EmitContext& ctx);
 void EmitCompositeExtractF64x2(EmitContext& ctx);
 void EmitCompositeExtractF64x3(EmitContext& ctx);
 void EmitCompositeExtractF64x4(EmitContext& ctx);
-void EmitSelect8(EmitContext& ctx);
-void EmitSelect16(EmitContext& ctx);
-Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
-void EmitSelect64(EmitContext& ctx);
+Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
 void EmitBitCastU16F16(EmitContext& ctx);
 Id EmitBitCastU32F32(EmitContext& ctx, Id value);
 void EmitBitCastU64F64(EmitContext& ctx);
@@ -149,18 +151,15 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitFPNeg16(EmitContext& ctx, Id value);
 Id EmitFPNeg32(EmitContext& ctx, Id value);
 Id EmitFPNeg64(EmitContext& ctx, Id value);
-void EmitFPRecip32(EmitContext& ctx);
-void EmitFPRecip64(EmitContext& ctx);
-void EmitFPRecipSqrt32(EmitContext& ctx);
-void EmitFPRecipSqrt64(EmitContext& ctx);
-void EmitFPSqrt(EmitContext& ctx);
-void EmitFPSin(EmitContext& ctx);
-void EmitFPSinNotReduced(EmitContext& ctx);
-void EmitFPExp2(EmitContext& ctx);
-void EmitFPExp2NotReduced(EmitContext& ctx);
-void EmitFPCos(EmitContext& ctx);
-void EmitFPCosNotReduced(EmitContext& ctx);
-void EmitFPLog2(EmitContext& ctx);
+Id EmitFPSin(EmitContext& ctx, Id value);
+Id EmitFPCos(EmitContext& ctx, Id value);
+Id EmitFPExp2(EmitContext& ctx, Id value);
+Id EmitFPLog2(EmitContext& ctx, Id value);
+Id EmitFPRecip32(EmitContext& ctx, Id value);
+Id EmitFPRecip64(EmitContext& ctx, Id value);
+Id EmitFPRecipSqrt32(EmitContext& ctx, Id value);
+Id EmitFPRecipSqrt64(EmitContext& ctx, Id value);
+Id EmitFPSqrt(EmitContext& ctx, Id value);
 Id EmitFPSaturate16(EmitContext& ctx, Id value);
 Id EmitFPSaturate32(EmitContext& ctx, Id value);
 Id EmitFPSaturate64(EmitContext& ctx, Id value);
@@ -176,6 +175,42 @@ Id EmitFPCeil64(EmitContext& ctx, Id value);
 Id EmitFPTrunc16(EmitContext& ctx, Id value);
 Id EmitFPTrunc32(EmitContext& ctx, Id value);
 Id EmitFPTrunc64(EmitContext& ctx, Id value);
+Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
 Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 void EmitIAdd64(EmitContext& ctx);
 Id EmitISub32(EmitContext& ctx, Id a, Id b);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
index 5d0b74f9b9..749f117420 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -100,52 +100,40 @@ Id EmitFPNeg64(EmitContext& ctx, Id value) {
     return ctx.OpFNegate(ctx.F64[1], value);
 }
 
-void EmitFPRecip32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitFPRecip64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPSin(EmitContext& ctx, Id value) {
+    return ctx.OpSin(ctx.F32[1], value);
 }
 
-void EmitFPRecipSqrt32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPCos(EmitContext& ctx, Id value) {
+    return ctx.OpCos(ctx.F32[1], value);
 }
 
-void EmitFPRecipSqrt64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPExp2(EmitContext& ctx, Id value) {
+    return ctx.OpExp2(ctx.F32[1], value);
 }
 
-void EmitFPSqrt(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPLog2(EmitContext& ctx, Id value) {
+    return ctx.OpLog2(ctx.F32[1], value);
 }
 
-void EmitFPSin(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPRecip32(EmitContext& ctx, Id value) {
+    return ctx.OpFDiv(ctx.F32[1], ctx.Constant(ctx.F32[1], 1.0f), value);
 }
 
-void EmitFPSinNotReduced(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPRecip64(EmitContext& ctx, Id value) {
+    return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value);
 }
 
-void EmitFPExp2(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) {
+    return ctx.OpInverseSqrt(ctx.F32[1], value);
 }
 
-void EmitFPExp2NotReduced(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) {
+    return ctx.OpInverseSqrt(ctx.F64[1], value);
 }
 
-void EmitFPCos(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitFPCosNotReduced(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
-void EmitFPLog2(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitFPSqrt(EmitContext& ctx, Id value) {
+    return ctx.OpSqrt(ctx.F32[1], value);
 }
 
 Id EmitFPSaturate16(EmitContext& ctx, Id value) {
@@ -214,4 +202,148 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) {
     return ctx.OpTrunc(ctx.F64[1], value);
 }
 
+Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+    return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
index eb1926a4db..21cca44556 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
@@ -6,20 +6,29 @@
 
 namespace Shader::Backend::SPIRV {
 
-void EmitSelect8(EmitContext&) {
+Id EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Id cond,
+                [[maybe_unused]] Id true_value, [[maybe_unused]] Id false_value) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitSelect16(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.U16, cond, true_value, false_value);
 }
 
-Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
     return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value);
 }
 
-void EmitSelect64(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
+Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.U64, cond, true_value, false_value);
+}
+
+Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value);
+}
+
+Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+    return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value);
 }
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 34c2f67fb3..8ba86e614e 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -361,19 +361,21 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
     }
 }
 
-UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) {
+Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
     if (true_value.Type() != false_value.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
     }
     switch (true_value.Type()) {
     case Type::U8:
-        return Inst<UAny>(Opcode::Select8, condition, true_value, false_value);
+        return Inst(Opcode::SelectU8, condition, true_value, false_value);
     case Type::U16:
-        return Inst<UAny>(Opcode::Select16, condition, true_value, false_value);
+        return Inst(Opcode::SelectU16, condition, true_value, false_value);
     case Type::U32:
-        return Inst<UAny>(Opcode::Select32, condition, true_value, false_value);
+        return Inst(Opcode::SelectU32, condition, true_value, false_value);
     case Type::U64:
-        return Inst<UAny>(Opcode::Select64, condition, true_value, false_value);
+        return Inst(Opcode::SelectU64, condition, true_value, false_value);
+    case Type::F32:
+        return Inst(Opcode::SelectF32, condition, true_value, false_value);
     default:
         throw InvalidArgument("Invalid type {}", true_value.Type());
     }
@@ -503,12 +505,16 @@ F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
     return result;
 }
 
-F32 IREmitter::FPCosNotReduced(const F32& value) {
-    return Inst<F32>(Opcode::FPCosNotReduced, value);
+F32 IREmitter::FPCos(const F32& value) {
+    return Inst<F32>(Opcode::FPCos, value);
+}
+
+F32 IREmitter::FPSin(const F32& value) {
+    return Inst<F32>(Opcode::FPSin, value);
 }
 
-F32 IREmitter::FPExp2NotReduced(const F32& value) {
-    return Inst<F32>(Opcode::FPExp2NotReduced, value);
+F32 IREmitter::FPExp2(const F32& value) {
+    return Inst<F32>(Opcode::FPExp2, value);
 }
 
 F32 IREmitter::FPLog2(const F32& value) {
@@ -517,9 +523,9 @@ F32 IREmitter::FPLog2(const F32& value) {
 
 F32F64 IREmitter::FPRecip(const F32F64& value) {
     switch (value.Type()) {
-    case Type::U32:
+    case Type::F32:
         return Inst<F32>(Opcode::FPRecip32, value);
-    case Type::U64:
+    case Type::F64:
         return Inst<F64>(Opcode::FPRecip64, value);
     default:
         ThrowInvalidType(value.Type());
@@ -528,19 +534,15 @@ F32F64 IREmitter::FPRecip(const F32F64& value) {
 
 F32F64 IREmitter::FPRecipSqrt(const F32F64& value) {
     switch (value.Type()) {
-    case Type::U32:
+    case Type::F32:
         return Inst<F32>(Opcode::FPRecipSqrt32, value);
-    case Type::U64:
+    case Type::F64:
         return Inst<F64>(Opcode::FPRecipSqrt64, value);
     default:
         ThrowInvalidType(value.Type());
     }
 }
 
-F32 IREmitter::FPSinNotReduced(const F32& value) {
-    return Inst<F32>(Opcode::FPSinNotReduced, value);
-}
-
 F32 IREmitter::FPSqrt(const F32& value) {
     return Inst<F32>(Opcode::FPSqrt, value);
 }
@@ -610,6 +612,114 @@ F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) {
     }
 }
 
+U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, lhs,
+                        rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, lhs,
+                        rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, lhs,
+                        rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16,
+                        lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32,
+                        lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64,
+                        lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16
+                                : Opcode::FPUnordGreaterThanEqual16,
+                        lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32
+                                : Opcode::FPUnordGreaterThanEqual32,
+                        lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64
+                                : Opcode::FPUnordGreaterThanEqual64,
+                        lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
 U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
     if (a.Type() != b.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 959f4f9dac..2c923716a3 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -98,7 +98,8 @@ public:
                                            const Value& e4);
     [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
 
-    [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value);
+    [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
+                               const Value& false_value);
 
     template <typename Dest, typename Source>
     [[nodiscard]] Dest BitCast(const Source& value);
@@ -121,12 +122,12 @@ public:
     [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
     [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);
 
-    [[nodiscard]] F32 FPCosNotReduced(const F32& value);
-    [[nodiscard]] F32 FPExp2NotReduced(const F32& value);
+    [[nodiscard]] F32 FPCos(const F32& value);
+    [[nodiscard]] F32 FPSin(const F32& value);
+    [[nodiscard]] F32 FPExp2(const F32& value);
     [[nodiscard]] F32 FPLog2(const F32& value);
     [[nodiscard]] F32F64 FPRecip(const F32F64& value);
     [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
-    [[nodiscard]] F32 FPSinNotReduced(const F32& value);
     [[nodiscard]] F32 FPSqrt(const F32& value);
     [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
     [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
@@ -134,6 +135,15 @@ public:
     [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
     [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});
 
+    [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
+    [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
+    [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
+    [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
+    [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
+                                     bool ordered = true);
+    [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
+                                        bool ordered = true);
+
     [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
     [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
     [[nodiscard]] U32 IMul(const U32& a, const U32& b);
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 50da77535e..f2d71144a1 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -103,10 +103,12 @@ OPCODE(CompositeExtractF64x3,                               F64,            F64x
 OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                            )
 
 // Select operations
-OPCODE(Select8,                                             U8,             U1,             U8,             U8,                             )
-OPCODE(Select16,                                            U16,            U1,             U16,            U16,                            )
-OPCODE(Select32,                                            U32,            U1,             U32,            U32,                            )
-OPCODE(Select64,                                            U64,            U1,             U64,            U64,                            )
+OPCODE(SelectU8,                                            U8,             U1,             U8,             U8,                             )
+OPCODE(SelectU16,                                           U16,            U1,             U16,            U16,                            )
+OPCODE(SelectU32,                                           U32,            U1,             U32,            U32,                            )
+OPCODE(SelectU64,                                           U64,            U1,             U64,            U64,                            )
+OPCODE(SelectF16,                                           F16,            U1,             F16,            F16,                            )
+OPCODE(SelectF32,                                           F32,            U1,             F32,            F32,                            )
 
 // Bitwise conversions
 OPCODE(BitCastU16F16,                                       U16,            F16,                                                            )
@@ -156,11 +158,8 @@ OPCODE(FPRecipSqrt32,                                       F32,            F32,
 OPCODE(FPRecipSqrt64,                                       F64,            F64,                                                            )
 OPCODE(FPSqrt,                                              F32,            F32,                                                            )
 OPCODE(FPSin,                                               F32,            F32,                                                            )
-OPCODE(FPSinNotReduced,                                     F32,            F32,                                                            )
 OPCODE(FPExp2,                                              F32,            F32,                                                            )
-OPCODE(FPExp2NotReduced,                                    F32,            F32,                                                            )
 OPCODE(FPCos,                                               F32,            F32,                                                            )
-OPCODE(FPCosNotReduced,                                     F32,            F32,                                                            )
 OPCODE(FPLog2,                                              F32,            F32,                                                            )
 OPCODE(FPSaturate16,                                        F16,            F16,                                                            )
 OPCODE(FPSaturate32,                                        F32,            F32,                                                            )
@@ -178,6 +177,43 @@ OPCODE(FPTrunc16,                                           F16,            F16,
 OPCODE(FPTrunc32,                                           F32,            F32,                                                            )
 OPCODE(FPTrunc64,                                           F64,            F64,                                                            )
 
+OPCODE(FPOrdEqual16,                                        U1,             F16,            F16,                                            )
+OPCODE(FPOrdEqual32,                                        U1,             F32,            F32,                                            )
+OPCODE(FPOrdEqual64,                                        U1,             F64,            F64,                                            )
+OPCODE(FPUnordEqual16,                                      U1,             F16,            F16,                                            )
+OPCODE(FPUnordEqual32,                                      U1,             F32,            F32,                                            )
+OPCODE(FPUnordEqual64,                                      U1,             F64,            F64,                                            )
+OPCODE(FPOrdNotEqual16,                                     U1,             F16,            F16,                                            )
+OPCODE(FPOrdNotEqual32,                                     U1,             F32,            F32,                                            )
+OPCODE(FPOrdNotEqual64,                                     U1,             F64,            F64,                                            )
+OPCODE(FPUnordNotEqual16,                                   U1,             F16,            F16,                                            )
+OPCODE(FPUnordNotEqual32,                                   U1,             F32,            F32,                                            )
+OPCODE(FPUnordNotEqual64,                                   U1,             F64,            F64,                                            )
+OPCODE(FPOrdLessThan16,                                     U1,             F16,            F16,                                            )
+OPCODE(FPOrdLessThan32,                                     U1,             F32,            F32,                                            )
+OPCODE(FPOrdLessThan64,                                     U1,             F64,            F64,                                            )
+OPCODE(FPUnordLessThan16,                                   U1,             F16,            F16,                                            )
+OPCODE(FPUnordLessThan32,                                   U1,             F32,            F32,                                            )
+OPCODE(FPUnordLessThan64,                                   U1,             F64,            F64,                                            )
+OPCODE(FPOrdGreaterThan16,                                  U1,             F16,            F16,                                            )
+OPCODE(FPOrdGreaterThan32,                                  U1,             F32,            F32,                                            )
+OPCODE(FPOrdGreaterThan64,                                  U1,             F64,            F64,                                            )
+OPCODE(FPUnordGreaterThan16,                                U1,             F16,            F16,                                            )
+OPCODE(FPUnordGreaterThan32,                                U1,             F32,            F32,                                            )
+OPCODE(FPUnordGreaterThan64,                                U1,             F64,            F64,                                            )
+OPCODE(FPOrdLessThanEqual16,                                U1,             F16,            F16,                                            )
+OPCODE(FPOrdLessThanEqual32,                                U1,             F32,            F32,                                            )
+OPCODE(FPOrdLessThanEqual64,                                U1,             F64,            F64,                                            )
+OPCODE(FPUnordLessThanEqual16,                              U1,             F16,            F16,                                            )
+OPCODE(FPUnordLessThanEqual32,                              U1,             F32,            F32,                                            )
+OPCODE(FPUnordLessThanEqual64,                              U1,             F64,            F64,                                            )
+OPCODE(FPOrdGreaterThanEqual16,                             U1,             F16,            F16,                                            )
+OPCODE(FPOrdGreaterThanEqual32,                             U1,             F32,            F32,                                            )
+OPCODE(FPOrdGreaterThanEqual64,                             U1,             F64,            F64,                                            )
+OPCODE(FPUnordGreaterThanEqual16,                           U1,             F16,            F16,                                            )
+OPCODE(FPUnordGreaterThanEqual32,                           U1,             F32,            F32,                                            )
+OPCODE(FPUnordGreaterThanEqual64,                           U1,             F64,            F64,                                            )
+
 // Integer operations
 OPCODE(IAdd32,                                              U32,            U32,            U32,                                            )
 OPCODE(IAdd64,                                              U64,            U64,            U64,                                            )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
index 3da37a2bb5..fd73f656c5 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -46,7 +46,8 @@ inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
     case FmzMode::FTZ:
         return IR::FmzMode::FTZ;
     case FmzMode::FMZ:
-        return IR::FmzMode::FMZ;
+        // FMZ is manually handled in the instruction
+        return IR::FmzMode::FTZ;
     case FmzMode::INVALIDFMZ3:
         break;
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
index 219ffcc6af..76a807d4ed 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -53,7 +53,7 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
 } // Anonymous namespace
 
 void TranslatorVisitor::FADD_reg(u64 insn) {
-    FADD(*this, insn, GetRegFloat20(insn));
+    FADD(*this, insn, GetFloatReg20(insn));
 }
 
 void TranslatorVisitor::FADD_cbuf(u64 insn) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
index 758700d3c4..c2ca0873b7 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s
 } // Anonymous namespace
 
 void TranslatorVisitor::FFMA_reg(u64 insn) {
-    FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn));
+    FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
 }
 
 void TranslatorVisitor::FFMA_rc(u64) {
@@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) {
 }
 
 void TranslatorVisitor::FFMA_cr(u64 insn) {
-    FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn));
+    FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
 }
 
 void TranslatorVisitor::FFMA_imm(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
index ba005fbf45..2f86056194 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -10,7 +10,7 @@
 
 namespace Shader::Maxwell {
 namespace {
-enum class Operation {
+enum class Operation : u64 {
     Cos = 0,
     Sin = 1,
     Ex2 = 2,    // Base 2 exponent
@@ -39,11 +39,11 @@ void TranslatorVisitor::MUFU(u64 insn) {
     IR::F32 value{[&]() -> IR::F32 {
         switch (mufu.operation) {
         case Operation::Cos:
-            return ir.FPCosNotReduced(op_a);
+            return ir.FPCos(op_a);
         case Operation::Sin:
-            return ir.FPSinNotReduced(op_a);
+            return ir.FPSin(op_a);
         case Operation::Ex2:
-            return ir.FPExp2NotReduced(op_a);
+            return ir.FPExp2(op_a);
         case Operation::Lg2:
             return ir.FPLog2(op_a);
         case Operation::Rcp:
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
index 5c38d3fc11..edf2cadaea 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -55,9 +55,6 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode
     if (cc) {
         throw NotImplementedException("FMUL CC");
     }
-    if (sat) {
-        throw NotImplementedException("FMUL SAT");
-    }
     IR::F32 op_a{v.F(fmul.src_a)};
     if (scale != Scale::None) {
         if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
@@ -71,7 +68,20 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode
         .rounding{CastFpRounding(fp_rounding)},
         .fmz_mode{CastFmzMode(fmz_mode)},
     };
-    v.F(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control));
+    IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
+    if (fmz_mode == FmzMode::FMZ && !sat) {
+        // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+        // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+        const IR::F32 zero{v.ir.Imm32(0.0f)};
+        const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
+        const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
+        const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
+        value = IR::F32{v.ir.Select(any_zero, zero, value)};
+    }
+    if (sat) {
+        value = v.ir.FPSaturate(value);
+    }
+    v.F(fmul.dest_reg, value);
 }
 
 void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
@@ -83,27 +93,33 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
         BitField<47, 1, u64> cc;
         BitField<48, 1, u64> neg_b;
         BitField<50, 1, u64> sat;
-    } fmul{insn};
-
+    } const fmul{insn};
     FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
          fmul.neg_b != 0);
 }
 } // Anonymous namespace
 
 void TranslatorVisitor::FMUL_reg(u64 insn) {
-    return FMUL(*this, insn, GetRegFloat20(insn));
+    return FMUL(*this, insn, GetFloatReg20(insn));
 }
 
-void TranslatorVisitor::FMUL_cbuf(u64) {
-    throw NotImplementedException("FMUL (cbuf)");
+void TranslatorVisitor::FMUL_cbuf(u64 insn) {
+    return FMUL(*this, insn, GetFloatCbuf(insn));
 }
 
-void TranslatorVisitor::FMUL_imm(u64) {
-    throw NotImplementedException("FMUL (imm)");
+void TranslatorVisitor::FMUL_imm(u64 insn) {
+    return FMUL(*this, insn, GetFloatImm20(insn));
 }
 
-void TranslatorVisitor::FMUL32I(u64) {
-    throw NotImplementedException("FMUL32I");
+void TranslatorVisitor::FMUL32I(u64 insn) {
+    union {
+        u64 raw;
+        BitField<52, 1, u64> cc;
+        BitField<53, 2, FmzMode> fmz;
+        BitField<55, 1, u64> sat;
+    } const fmul32i{insn};
+    FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
+         fmul32i.sat != 0, fmul32i.cc != 0, false);
 }
 
 } // namespace Shader::Maxwell
\ No newline at end of file
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 0000000000..f91b93fad1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+    SINCOS,
+    EX2,
+};
+
+void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<39, 1, Mode> mode;
+        BitField<45, 1, u64> neg;
+        BitField<49, 1, u64> abs;
+    } const rro{insn};
+
+    v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::RRO_reg(u64 insn) {
+    RRO(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::RRO_cbuf(u64 insn) {
+    RRO(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::RRO_imm(u64) {
+    throw NotImplementedException("RRO (imm)");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 165d475b92..a5a0e1a9b2 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
     return X(reg.index);
 }
 
-IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) {
+IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
     return ir.BitCast<IR::F32>(GetReg20(insn));
 }
 
-IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) {
+IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
     return ir.BitCast<IR::F32>(GetReg39(insn));
 }
 
@@ -110,6 +110,14 @@ IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
     return ir.Imm32(static_cast<u32>(imm.value));
 }
 
+IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 32, u64> value;
+    } const imm{insn};
+    return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
+}
+
 void TranslatorVisitor::SetZFlag(const IR::U1& value) {
     ir.SetZFlag(value);
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index 4d4cf2ebfb..4e722e2059 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -304,8 +304,8 @@ public:
     [[nodiscard]] IR::U32 GetReg8(u64 insn);
     [[nodiscard]] IR::U32 GetReg20(u64 insn);
     [[nodiscard]] IR::U32 GetReg39(u64 insn);
-    [[nodiscard]] IR::F32 GetRegFloat20(u64 insn);
-    [[nodiscard]] IR::F32 GetRegFloat39(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
 
     [[nodiscard]] IR::U32 GetCbuf(u64 insn);
     [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
@@ -314,6 +314,7 @@ public:
     [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
 
     [[nodiscard]] IR::U32 GetImm32(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
 
     void SetZFlag(const IR::U1& value);
     void SetSFlag(const IR::U1& value);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
index d8a5158b59..20af68852a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -50,7 +50,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
         //
         const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
         const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
-        result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0));
+        result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
     }
     v.X(shl.dest_reg, result);
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 628cf1c14e..4114e10bea 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -721,18 +721,6 @@ void TranslatorVisitor::RET(u64) {
     ThrowNotImplemented(Opcode::RET);
 }
 
-void TranslatorVisitor::RRO_reg(u64) {
-    ThrowNotImplemented(Opcode::RRO_reg);
-}
-
-void TranslatorVisitor::RRO_cbuf(u64) {
-    ThrowNotImplemented(Opcode::RRO_cbuf);
-}
-
-void TranslatorVisitor::RRO_imm(u64) {
-    ThrowNotImplemented(Opcode::RRO_imm);
-}
-
 void TranslatorVisitor::RTT(u64) {
     ThrowNotImplemented(Opcode::RTT);
 }
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 4d4e88259a..ae3d5a7d6d 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -330,7 +330,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
         return FoldBitCast<u32, f32>(inst, IR::Opcode::BitCastF32U32);
     case IR::Opcode::IAdd64:
         return FoldAdd<u64>(block, inst);
-    case IR::Opcode::Select32:
+    case IR::Opcode::SelectU32:
         return FoldSelect<u32>(inst);
     case IR::Opcode::LogicalAnd:
         return FoldLogicalAnd(inst);
-- 
cgit v1.2.3-70-g09d2


From 3a63fa0477ea8297c80133d35494e1dfdc012f95 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 9 Mar 2021 17:14:57 -0300
Subject: shader: Partial implementation of LDC

---
 src/shader_recompiler/CMakeLists.txt               |   1 +
 .../backend/spirv/emit_context.cpp                 |  63 +++++++---
 src/shader_recompiler/backend/spirv/emit_context.h |  22 +++-
 src/shader_recompiler/backend/spirv/emit_spirv.h   |   8 +-
 .../backend/spirv/emit_spirv_context_get_set.cpp   |  56 ++++++++-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  22 +++-
 src/shader_recompiler/frontend/ir/ir_emitter.h     |   3 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |   8 +-
 .../frontend/maxwell/translate/impl/impl.cpp       |  16 ++-
 .../maxwell/translate/impl/load_constant.cpp       |  85 +++++++++++++
 .../maxwell/translate/impl/not_implemented.cpp     |   4 -
 .../ir_opt/collect_shader_info_pass.cpp            | 135 +++++++++++++++++++--
 .../ir_opt/constant_propagation_pass.cpp           |  22 +++-
 .../global_memory_to_storage_buffer_pass.cpp       |   2 +-
 src/shader_recompiler/ir_opt/texture_pass.cpp      |   2 +-
 src/shader_recompiler/shader_info.h                |   6 +
 16 files changed, 405 insertions(+), 50 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index fa268d38fc..755db5dfa0 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -88,6 +88,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/integer_shift_right.cpp
     frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
     frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
+    frontend/maxwell/translate/impl/load_constant.cpp
     frontend/maxwell/translate/impl/load_effective_address.cpp
     frontend/maxwell/translate/impl/load_store_attribute.cpp
     frontend/maxwell/translate/impl/load_store_memory.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 21900d3878..278b26b503 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -104,15 +104,23 @@ void EmitContext::DefineCommonTypes(const Info& info) {
 
     U1 = Name(TypeBool(), "u1");
 
-    // TODO: Conditionally define these
-    AddCapability(spv::Capability::Int16);
-    AddCapability(spv::Capability::Int64);
-    U16 = Name(TypeInt(16, false), "u16");
-    U64 = Name(TypeInt(64, false), "u64");
-
     F32.Define(*this, TypeFloat(32), "f32");
     U32.Define(*this, TypeInt(32, false), "u32");
 
+    if (info.uses_int8) {
+        AddCapability(spv::Capability::Int8);
+        U8 = Name(TypeInt(8, false), "u8");
+        S8 = Name(TypeInt(8, true), "s8");
+    }
+    if (info.uses_int16) {
+        AddCapability(spv::Capability::Int16);
+        U16 = Name(TypeInt(16, false), "u16");
+        S16 = Name(TypeInt(16, true), "s16");
+    }
+    if (info.uses_int64) {
+        AddCapability(spv::Capability::Int64);
+        U64 = Name(TypeInt(64, false), "u64");
+    }
     if (info.uses_fp16) {
         AddCapability(spv::Capability::Float16);
         F16.Define(*this, TypeFloat(16), "f16");
@@ -151,26 +159,51 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
     if (info.constant_buffer_descriptors.empty()) {
         return;
     }
-    const Id array_type{TypeArray(U32[1], Constant(U32[1], 4096))};
-    Decorate(array_type, spv::Decoration::ArrayStride, 4U);
+    if (True(info.used_constant_buffer_types & IR::Type::U8)) {
+        DefineConstantBuffers(info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8));
+        DefineConstantBuffers(info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8));
+    }
+    if (True(info.used_constant_buffer_types & IR::Type::U16)) {
+        DefineConstantBuffers(info, &UniformDefinitions::U16, binding, U16, 'u', sizeof(u16));
+        DefineConstantBuffers(info, &UniformDefinitions::S16, binding, S16, 's', sizeof(s16));
+    }
+    if (True(info.used_constant_buffer_types & IR::Type::U32)) {
+        DefineConstantBuffers(info, &UniformDefinitions::U32, binding, U32[1], 'u', sizeof(u32));
+    }
+    if (True(info.used_constant_buffer_types & IR::Type::F32)) {
+        DefineConstantBuffers(info, &UniformDefinitions::F32, binding, F32[1], 'f', sizeof(f32));
+    }
+    if (True(info.used_constant_buffer_types & IR::Type::U64)) {
+        DefineConstantBuffers(info, &UniformDefinitions::U64, binding, U64, 'u', sizeof(u64));
+    }
+    for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
+        binding += desc.count;
+    }
+}
+
+void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type,
+                                        u32 binding, Id type, char type_char, u32 element_size) {
+    const Id array_type{TypeArray(type, Constant(U32[1], 65536U / element_size))};
+    Decorate(array_type, spv::Decoration::ArrayStride, element_size);
 
     const Id struct_type{TypeStruct(array_type)};
-    Name(struct_type, "cbuf_block");
+    Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT));
     Decorate(struct_type, spv::Decoration::Block);
     MemberName(struct_type, 0, "data");
     MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
 
-    const Id uniform_type{TypePointer(spv::StorageClass::Uniform, struct_type)};
-    uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]);
+    const Id struct_pointer_type{TypePointer(spv::StorageClass::Uniform, struct_type)};
+    const Id uniform_type{TypePointer(spv::StorageClass::Uniform, type)};
+    uniform_types.*member_type = uniform_type;
 
-    u32 index{};
     for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
-        const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)};
+        const Id id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)};
         Decorate(id, spv::Decoration::Binding, binding);
         Decorate(id, spv::Decoration::DescriptorSet, 0U);
         Name(id, fmt::format("c{}", desc.index));
-        std::fill_n(cbufs.data() + desc.index, desc.count, id);
-        index += desc.count;
+        for (size_t i = 0; i < desc.count; ++i) {
+            cbufs[desc.index + i].*member_type = id;
+        }
         binding += desc.count;
     }
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 8b3109eb8c..35eca258a8 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -10,8 +10,8 @@
 #include <sirit/sirit.h>
 
 #include "shader_recompiler/frontend/ir/program.h"
-#include "shader_recompiler/shader_info.h"
 #include "shader_recompiler/profile.h"
+#include "shader_recompiler/shader_info.h"
 
 namespace Shader::Backend::SPIRV {
 
@@ -34,6 +34,16 @@ struct TextureDefinition {
     Id type;
 };
 
+struct UniformDefinitions {
+    Id U8{};
+    Id S8{};
+    Id U16{};
+    Id S16{};
+    Id U32{};
+    Id F32{};
+    Id U64{};
+};
+
 class EmitContext final : public Sirit::Module {
 public:
     explicit EmitContext(const Profile& profile, IR::Program& program);
@@ -45,7 +55,10 @@ public:
 
     Id void_id{};
     Id U1{};
+    Id U8{};
+    Id S8{};
     Id U16{};
+    Id S16{};
     Id U64{};
     VectorTypes F32;
     VectorTypes U32;
@@ -56,10 +69,11 @@ public:
     Id false_value{};
     Id u32_zero_value{};
 
-    Id uniform_u32{};
+    UniformDefinitions uniform_types;
+
     Id storage_u32{};
 
-    std::array<Id, Info::MAX_CBUFS> cbufs{};
+    std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
     std::array<Id, Info::MAX_SSBOS> ssbos{};
     std::vector<TextureDefinition> textures;
 
@@ -71,6 +85,8 @@ private:
     void DefineCommonConstants();
     void DefineSpecialVariables(const Info& info);
     void DefineConstantBuffers(const Info& info, u32& binding);
+    void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding,
+                               Id type, char type_char, u32 element_size);
     void DefineStorageBuffers(const Info& info, u32& binding);
     void DefineTextures(const Info& info, u32& binding);
     void DefineLabels(IR::Program& program);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 69698c478e..aafc59bbbf 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -34,7 +34,13 @@ void EmitGetPred(EmitContext& ctx);
 void EmitSetPred(EmitContext& ctx);
 void EmitSetGotoVariable(EmitContext& ctx);
 void EmitGetGotoVariable(EmitContext& ctx);
-Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 void EmitGetAttribute(EmitContext& ctx);
 void EmitSetAttribute(EmitContext& ctx);
 void EmitGetAttributeIndexed(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index eb9c01c5a5..125b58cf74 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -30,17 +30,61 @@ void EmitGetGotoVariable(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr,
+                  u32 element_size, const IR::Value& binding, const IR::Value& offset) {
     if (!binding.IsImmediate()) {
         throw NotImplementedException("Constant buffer indexing");
     }
+    const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
+    const Id uniform_type{ctx.uniform_types.*member_ptr};
     if (!offset.IsImmediate()) {
-        throw NotImplementedException("Variable constant buffer offset");
+        Id index{ctx.Def(offset)};
+        if (element_size > 1) {
+            const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
+            const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)};
+            index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
+        }
+        const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)};
+        return ctx.OpLoad(result_type, access_chain);
     }
-    const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / 4)};
-    const Id cbuf{ctx.cbufs[binding.U32()]};
-    const Id access_chain{ctx.OpAccessChain(ctx.uniform_u32, cbuf, ctx.u32_zero_value, imm_offset)};
-    return ctx.OpLoad(ctx.U32[1], access_chain);
+    if (offset.U32() % element_size != 0) {
+        throw NotImplementedException("Unaligned immediate constant buffer load");
+    }
+    const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)};
+    const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)};
+    return ctx.OpLoad(result_type, access_chain);
+}
+
+Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)};
+    return ctx.OpUConvert(ctx.U32[1], load);
+}
+
+Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)};
+    return ctx.OpSConvert(ctx.U32[1], load);
+}
+
+Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    const Id load{GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)};
+    return ctx.OpUConvert(ctx.U32[1], load);
+}
+
+Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    const Id load{GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)};
+    return ctx.OpSConvert(ctx.U32[1], load);
+}
+
+Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset);
+}
+
+Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset);
+}
+
+Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+    return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset);
 }
 
 void EmitGetAttribute(EmitContext&) {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index ae3354c669..33819dd36d 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -112,7 +112,27 @@ void IREmitter::SetPred(IR::Pred pred, const U1& value) {
 }
 
 U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) {
-    return Inst<U32>(Opcode::GetCbuf, binding, byte_offset);
+    return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
+}
+
+UAny IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
+                        bool is_signed) {
+    switch (bitsize) {
+    case 8:
+        return Inst<U32>(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset);
+    case 16:
+        return Inst<U32>(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset);
+    case 32:
+        return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
+    case 64:
+        return Inst<U64>(Opcode::GetCbufU64, binding, byte_offset);
+    default:
+        throw InvalidArgument("Invalid bit size {}", bitsize);
+    }
+}
+
+F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) {
+    return Inst<F32>(Opcode::GetCbufF32, binding, byte_offset);
 }
 
 U1 IREmitter::GetZFlag() {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index cb2a7710a1..e4d1105401 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -47,6 +47,9 @@ public:
     void SetGotoVariable(u32 id, const U1& value);
 
     [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
+    [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
+                               bool is_signed);
+    [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset);
 
     [[nodiscard]] U1 GetZFlag();
     [[nodiscard]] U1 GetSFlag();
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index aa011fab1b..64bd495ed4 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -21,7 +21,13 @@ OPCODE(GetPred,                                             U1,             Pred
 OPCODE(SetPred,                                             Void,           Pred,           U1,                                                             )
 OPCODE(GetGotoVariable,                                     U1,             U32,                                                                            )
 OPCODE(SetGotoVariable,                                     Void,           U32,            U1,                                                             )
-OPCODE(GetCbuf,                                             U32,            U32,            U32,                                                            )
+OPCODE(GetCbufU8,                                           U32,            U32,            U32,                                                            )
+OPCODE(GetCbufS8,                                           U32,            U32,            U32,                                                            )
+OPCODE(GetCbufU16,                                          U32,            U32,            U32,                                                            )
+OPCODE(GetCbufS16,                                          U32,            U32,            U32,                                                            )
+OPCODE(GetCbufU32,                                          U32,            U32,            U32,                                                            )
+OPCODE(GetCbufF32,                                          F32,            U32,            U32,                                                            )
+OPCODE(GetCbufU64,                                          U64,            U32,            U32,                                                            )
 OPCODE(GetAttribute,                                        U32,            Attribute,                                                                      )
 OPCODE(SetAttribute,                                        Void,           Attribute,      U32,                                                            )
 OPCODE(GetAttributeIndexed,                                 U32,            U32,                                                                            )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index a5a0e1a9b2..7564aeeb24 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -56,25 +56,32 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
     return ir.BitCast<IR::F32>(GetReg39(insn));
 }
 
-IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
+static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
     union {
         u64 raw;
         BitField<20, 14, s64> offset;
         BitField<34, 5, u64> binding;
     } const cbuf{insn};
+
     if (cbuf.binding >= 18) {
         throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
     }
     if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) {
         throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
     }
-    const IR::U32 binding{ir.Imm32(static_cast<u32>(cbuf.binding))};
-    const IR::U32 byte_offset{ir.Imm32(static_cast<u32>(cbuf.offset) * 4)};
+    const IR::Value binding{static_cast<u32>(cbuf.binding)};
+    const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
+    return {IR::U32{binding}, IR::U32{byte_offset}};
+}
+
+IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
+    const auto[binding, byte_offset]{CbufAddr(insn)};
     return ir.GetCbuf(binding, byte_offset);
 }
 
 IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
-    return ir.BitCast<IR::F32>(GetCbuf(insn));
+    const auto[binding, byte_offset]{CbufAddr(insn)};
+    return ir.GetFloatCbuf(binding, byte_offset);
 }
 
 IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
@@ -83,6 +90,7 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
         BitField<20, 19, u64> value;
         BitField<56, 1, u64> is_negative;
     } const imm{insn};
+
     if (imm.is_negative != 0) {
         const s64 raw{static_cast<s64>(imm.value)};
         return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
new file mode 100644
index 0000000000..39becf93c2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -0,0 +1,85 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+    Default,
+    IL,
+    IS,
+    ISL,
+};
+
+enum class Size : u64 {
+    U8,
+    S8,
+    U16,
+    S16,
+    B32,
+    B64,
+};
+
+std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
+                                 const IR::U32& reg, const IR::U32& imm) {
+    switch (mode) {
+    case Mode::Default:
+        return {imm_index, ir.IAdd(reg, imm)};
+    default:
+        break;
+    }
+    throw NotImplementedException("Mode {}", mode);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDC(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<20, 16, s64> offset;
+        BitField<36, 5, u64> index;
+        BitField<44, 2, Mode> mode;
+        BitField<48, 3, Size> size;
+    } const ldc{insn};
+
+    const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
+    const IR::U32 reg{X(ldc.src_reg)};
+    const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
+    const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
+    switch (ldc.size) {
+    case Size::U8:
+        X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, false));
+        break;
+    case Size::S8:
+        X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, true));
+        break;
+    case Size::U16:
+        X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, false));
+        break;
+    case Size::S16:
+        X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, true));
+        break;
+    case Size::B32:
+        X(ldc.dest_reg, ir.GetCbuf(index, offset, 32, false));
+        break;
+    case Size::B64: {
+        if (!IR::IsAligned(ldc.dest_reg, 2)) {
+            throw NotImplementedException("Unaligned destination register");
+        }
+        const IR::Value vector{ir.UnpackUint2x32(ir.GetCbuf(index, offset, 64, false))};
+        for (int i = 0; i < 2; ++i) {
+            X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
+        }
+        break;
+    }
+    default:
+        throw NotImplementedException("Invalid size {}", ldc.size.Value());
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index ff429c1263..5b153acffa 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -425,10 +425,6 @@ void TranslatorVisitor::LD(u64) {
     ThrowNotImplemented(Opcode::LD);
 }
 
-void TranslatorVisitor::LDC(u64) {
-    ThrowNotImplemented(Opcode::LDC);
-}
-
 void TranslatorVisitor::LDL(u64) {
     ThrowNotImplemented(Opcode::LDL);
 }
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 960beadd43..cdbe852213 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -25,18 +25,13 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
 
 void VisitUsages(Info& info, IR::Inst& inst) {
     switch (inst.Opcode()) {
-    case IR::Opcode::WorkgroupId:
-        info.uses_workgroup_id = true;
-        break;
-    case IR::Opcode::LocalInvocationId:
-        info.uses_local_invocation_id = true;
-        break;
     case IR::Opcode::CompositeConstructF16x2:
     case IR::Opcode::CompositeConstructF16x3:
     case IR::Opcode::CompositeConstructF16x4:
     case IR::Opcode::CompositeExtractF16x2:
     case IR::Opcode::CompositeExtractF16x3:
     case IR::Opcode::CompositeExtractF16x4:
+    case IR::Opcode::SelectF16:
     case IR::Opcode::BitCastU16F16:
     case IR::Opcode::BitCastF16U16:
     case IR::Opcode::PackFloat2x16:
@@ -75,13 +70,139 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::FPTrunc64:
         info.uses_fp64 = true;
         break;
-    case IR::Opcode::GetCbuf:
+    default:
+        break;
+    }
+    switch (inst.Opcode()) {
+    case IR::Opcode::GetCbufU8:
+    case IR::Opcode::GetCbufS8:
+    case IR::Opcode::UndefU8:
+    case IR::Opcode::LoadGlobalU8:
+    case IR::Opcode::LoadGlobalS8:
+    case IR::Opcode::WriteGlobalU8:
+    case IR::Opcode::WriteGlobalS8:
+    case IR::Opcode::LoadStorageU8:
+    case IR::Opcode::LoadStorageS8:
+    case IR::Opcode::WriteStorageU8:
+    case IR::Opcode::WriteStorageS8:
+    case IR::Opcode::SelectU8:
+        info.uses_int8 = true;
+        break;
+    default:
+        break;
+    }
+    switch (inst.Opcode()) {
+    case IR::Opcode::GetCbufU16:
+    case IR::Opcode::GetCbufS16:
+    case IR::Opcode::UndefU16:
+    case IR::Opcode::LoadGlobalU16:
+    case IR::Opcode::LoadGlobalS16:
+    case IR::Opcode::WriteGlobalU16:
+    case IR::Opcode::WriteGlobalS16:
+    case IR::Opcode::LoadStorageU16:
+    case IR::Opcode::LoadStorageS16:
+    case IR::Opcode::WriteStorageU16:
+    case IR::Opcode::WriteStorageS16:
+    case IR::Opcode::SelectU16:
+    case IR::Opcode::BitCastU16F16:
+    case IR::Opcode::BitCastF16U16:
+    case IR::Opcode::ConvertS16F16:
+    case IR::Opcode::ConvertS16F32:
+    case IR::Opcode::ConvertS16F64:
+    case IR::Opcode::ConvertU16F16:
+    case IR::Opcode::ConvertU16F32:
+    case IR::Opcode::ConvertU16F64:
+        info.uses_int16 = true;
+        break;
+    default:
+        break;
+    }
+    switch (inst.Opcode()) {
+    case IR::Opcode::GetCbufU64:
+    case IR::Opcode::UndefU64:
+    case IR::Opcode::LoadGlobalU8:
+    case IR::Opcode::LoadGlobalS8:
+    case IR::Opcode::LoadGlobalU16:
+    case IR::Opcode::LoadGlobalS16:
+    case IR::Opcode::LoadGlobal32:
+    case IR::Opcode::LoadGlobal64:
+    case IR::Opcode::LoadGlobal128:
+    case IR::Opcode::WriteGlobalU8:
+    case IR::Opcode::WriteGlobalS8:
+    case IR::Opcode::WriteGlobalU16:
+    case IR::Opcode::WriteGlobalS16:
+    case IR::Opcode::WriteGlobal32:
+    case IR::Opcode::WriteGlobal64:
+    case IR::Opcode::WriteGlobal128:
+    case IR::Opcode::SelectU64:
+    case IR::Opcode::BitCastU64F64:
+    case IR::Opcode::BitCastF64U64:
+    case IR::Opcode::PackUint2x32:
+    case IR::Opcode::UnpackUint2x32:
+    case IR::Opcode::IAdd64:
+    case IR::Opcode::ISub64:
+    case IR::Opcode::INeg64:
+    case IR::Opcode::ShiftLeftLogical64:
+    case IR::Opcode::ShiftRightLogical64:
+    case IR::Opcode::ShiftRightArithmetic64:
+    case IR::Opcode::ConvertS64F16:
+    case IR::Opcode::ConvertS64F32:
+    case IR::Opcode::ConvertS64F64:
+    case IR::Opcode::ConvertU64F16:
+    case IR::Opcode::ConvertU64F32:
+    case IR::Opcode::ConvertU64F64:
+    case IR::Opcode::ConvertU64U32:
+    case IR::Opcode::ConvertU32U64:
+    case IR::Opcode::ConvertF16U64:
+    case IR::Opcode::ConvertF32U64:
+    case IR::Opcode::ConvertF64U64:
+        info.uses_int64 = true;
+        break;
+    default:
+        break;
+    }
+    switch (inst.Opcode()) {
+    case IR::Opcode::WorkgroupId:
+        info.uses_workgroup_id = true;
+        break;
+    case IR::Opcode::LocalInvocationId:
+        info.uses_local_invocation_id = true;
+        break;
+    case IR::Opcode::GetCbufU8:
+    case IR::Opcode::GetCbufS8:
+    case IR::Opcode::GetCbufU16:
+    case IR::Opcode::GetCbufS16:
+    case IR::Opcode::GetCbufU32:
+    case IR::Opcode::GetCbufF32:
+    case IR::Opcode::GetCbufU64: {
         if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) {
             AddConstantBufferDescriptor(info, index.U32(), 1);
         } else {
             throw NotImplementedException("Constant buffer with non-immediate index");
         }
+        switch (inst.Opcode()) {
+        case IR::Opcode::GetCbufU8:
+        case IR::Opcode::GetCbufS8:
+            info.used_constant_buffer_types |= IR::Type::U8;
+            break;
+        case IR::Opcode::GetCbufU16:
+        case IR::Opcode::GetCbufS16:
+            info.used_constant_buffer_types |= IR::Type::U16;
+            break;
+        case IR::Opcode::GetCbufU32:
+            info.used_constant_buffer_types |= IR::Type::U32;
+            break;
+        case IR::Opcode::GetCbufF32:
+            info.used_constant_buffer_types |= IR::Type::F32;
+            break;
+        case IR::Opcode::GetCbufU64:
+            info.used_constant_buffer_types |= IR::Type::U64;
+            break;
+        default:
+            break;
+        }
         break;
+    }
     case IR::Opcode::BindlessImageSampleImplicitLod:
     case IR::Opcode::BindlessImageSampleExplicitLod:
     case IR::Opcode::BindlessImageSampleDrefImplicitLod:
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index ae3d5a7d6d..7ba9ebe9bb 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -193,7 +193,7 @@ void FoldISub32(IR::Inst& inst) {
     // ISub32 is generally used to subtract two constant buffers, compare and replace this with
     // zero if they equal.
     const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
-        return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf &&
+        return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 &&
                a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1);
     }};
     IR::Inst* op_a{inst.Arg(0).InstRecursive()};
@@ -207,7 +207,7 @@ void FoldISub32(IR::Inst& inst) {
         // Canonicalize local variables to simplify the following logic
         std::swap(op_a, op_b);
     }
-    if (op_b->Opcode() != IR::Opcode::GetCbuf) {
+    if (op_b->Opcode() != IR::Opcode::GetCbufU32) {
         return;
     }
     IR::Inst* const inst_cbuf{op_b};
@@ -277,7 +277,7 @@ void FoldLogicalNot(IR::Inst& inst) {
     }
 }
 
-template <typename Dest, typename Source>
+template <IR::Opcode op, typename Dest, typename Source>
 void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
     const IR::Value value{inst.Arg(0)};
     if (value.IsImmediate()) {
@@ -285,8 +285,18 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
         return;
     }
     IR::Inst* const arg_inst{value.InstRecursive()};
-    if (value.InstRecursive()->Opcode() == reverse) {
+    if (arg_inst->Opcode() == reverse) {
         inst.ReplaceUsesWith(arg_inst->Arg(0));
+        return;
+    }
+    if constexpr (op == IR::Opcode::BitCastF32U32) {
+        if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) {
+            // Replace the bitcast with a typed constant buffer read
+            inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
+            inst.SetArg(0, arg_inst->Arg(0));
+            inst.SetArg(1, arg_inst->Arg(1));
+            return;
+        }
     }
 }
 
@@ -325,9 +335,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
     case IR::Opcode::ISub32:
         return FoldISub32(inst);
     case IR::Opcode::BitCastF32U32:
-        return FoldBitCast<f32, u32>(inst, IR::Opcode::BitCastU32F32);
+        return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
     case IR::Opcode::BitCastU32F32:
-        return FoldBitCast<u32, f32>(inst, IR::Opcode::BitCastF32U32);
+        return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
     case IR::Opcode::IAdd64:
         return FoldAdd<u64>(block, inst);
     case IR::Opcode::SelectU32:
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 2625c0bb2b..5d98d278e2 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -203,7 +203,7 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value,
         return std::nullopt;
     }
     const IR::Inst* const inst{value.InstRecursive()};
-    if (inst->Opcode() == IR::Opcode::GetCbuf) {
+    if (inst->Opcode() == IR::Opcode::GetCbufU32) {
         const IR::Value index{inst->Arg(0)};
         const IR::Value offset{inst->Arg(1)};
         if (!index.IsImmediate()) {
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index 80e4ad6a97..ec802e02c5 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -78,7 +78,7 @@ std::optional<ConstBufferAddr> Track(IR::Block* block, const IR::Value& value,
         return std::nullopt;
     }
     const IR::Inst* const inst{value.InstRecursive()};
-    if (inst->Opcode() == IR::Opcode::GetCbuf) {
+    if (inst->Opcode() == IR::Opcode::GetCbufU32) {
         const IR::Value index{inst->Arg(0)};
         const IR::Value offset{inst->Arg(1)};
         if (!index.IsImmediate()) {
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 103a2f0b43..adc1d9a64a 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -7,6 +7,7 @@
 #include <array>
 
 #include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/type.h"
 
 #include <boost/container/small_vector.hpp>
 #include <boost/container/static_vector.hpp>
@@ -61,10 +62,15 @@ struct Info {
     bool uses_fp16_denorms_preserve{};
     bool uses_fp32_denorms_flush{};
     bool uses_fp32_denorms_preserve{};
+    bool uses_int8{};
+    bool uses_int16{};
+    bool uses_int64{};
     bool uses_image_1d{};
     bool uses_sampled_1d{};
     bool uses_sparse_residency{};
 
+    IR::Type used_constant_buffer_types{};
+
     u32 constant_buffer_mask{};
 
     boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
-- 
cgit v1.2.3-70-g09d2


From 72990df7bad1c81d6ebc51179d34e1bfc71e0caf Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Thu, 18 Mar 2021 02:53:57 -0400
Subject: shader: Implement DADD

---
 src/shader_recompiler/CMakeLists.txt               |  1 +
 .../backend/spirv/emit_context.cpp                 |  2 +
 src/shader_recompiler/frontend/ir/value.cpp        |  8 +++
 src/shader_recompiler/frontend/ir/value.h          |  1 +
 .../frontend/maxwell/translate/impl/double_add.cpp | 67 ++++++++++++++++++++++
 .../frontend/maxwell/translate/impl/impl.cpp       | 52 ++++++++++++++++-
 .../frontend/maxwell/translate/impl/impl.h         |  3 +
 .../maxwell/translate/impl/not_implemented.cpp     | 12 ----
 8 files changed, 132 insertions(+), 14 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index e4e7749c79..b870e99378 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -64,6 +64,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/common_funcs.cpp
     frontend/maxwell/translate/impl/common_funcs.h
     frontend/maxwell/translate/impl/condition_code_set.cpp
+    frontend/maxwell/translate/impl/double_add.cpp
     frontend/maxwell/translate/impl/find_leading_one.cpp
     frontend/maxwell/translate/impl/floating_point_add.cpp
     frontend/maxwell/translate/impl/floating_point_compare.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index f848c61757..204389d749 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -94,6 +94,8 @@ Id EmitContext::Def(const IR::Value& value) {
         return Constant(U32[1], value.U32());
     case IR::Type::F32:
         return Constant(F32[1], value.F32());
+    case IR::Type::F64:
+        return Constant(F64[1], value.F64());
     default:
         throw NotImplementedException("Immediate type {}", value.Type());
     }
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index 791ba26906..e8e4662e7b 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -153,6 +153,14 @@ u64 Value::U64() const {
     return imm_u64;
 }
 
+f64 Value::F64() const {
+    if (IsIdentity()) {
+        return inst->Arg(0).F64();
+    }
+    ValidateAccess(Type::F64);
+    return imm_f64;
+}
+
 bool Value::operator==(const Value& other) const {
     if (type != other.type) {
         return false;
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 3602883d6f..b27601e704 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -52,6 +52,7 @@ public:
     [[nodiscard]] u32 U32() const;
     [[nodiscard]] f32 F32() const;
     [[nodiscard]] u64 U64() const;
+    [[nodiscard]] f64 F64() const;
 
     [[nodiscard]] bool operator==(const Value& other) const;
     [[nodiscard]] bool operator!=(const Value& other) const;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
new file mode 100644
index 0000000000..bece191d71
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -0,0 +1,67 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<39, 2, FpRounding> fp_rounding;
+        BitField<45, 1, u64> neg_b;
+        BitField<46, 1, u64> abs_a;
+        BitField<47, 1, u64> cc;
+        BitField<48, 1, u64> neg_a;
+        BitField<49, 1, u64> abs_b;
+    } const dadd{insn};
+
+    if (!IR::IsAligned(dadd.dest_reg, 2)) {
+        throw NotImplementedException("Unaligned destination register {}", dadd.dest_reg.Value());
+    }
+    if (!IR::IsAligned(dadd.src_a_reg, 2)) {
+        throw NotImplementedException("Unaligned destination register {}", dadd.src_a_reg.Value());
+    }
+    if (dadd.cc != 0) {
+        throw NotImplementedException("DADD CC");
+    }
+
+    const IR::Reg reg_a{dadd.src_a_reg};
+    const IR::F64 src_a{v.ir.PackDouble2x32(v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)))};
+    const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
+    const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
+
+    IR::FpControl control{
+        .no_contraction{true},
+        .rounding{CastFpRounding(dadd.fp_rounding)},
+        .fmz_mode{IR::FmzMode::None},
+    };
+    const IR::F64 value{v.ir.FPAdd(op_a, op_b, control)};
+    const IR::Value result{v.ir.UnpackDouble2x32(value)};
+
+    for (int i = 0; i < 2; i++) {
+        v.X(dadd.dest_reg + i, IR::U32{v.ir.CompositeExtract(result, i)});
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DADD_reg(u64 insn) {
+    DADD(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DADD_cbuf(u64 insn) {
+    DADD(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DADD_imm(u64 insn) {
+    DADD(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 7564aeeb24..e444dcd4fb 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -7,6 +7,15 @@
 #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
 
 namespace Shader::Maxwell {
+namespace {
+[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
+                                    u32 offset) {
+    if (unaligned) {
+        return ir.Imm32(0);
+    }
+    return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
+}
+} // Anonymous namespace
 
 IR::U32 TranslatorVisitor::X(IR::Reg reg) {
     return ir.GetReg(reg);
@@ -56,6 +65,18 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
     return ir.BitCast<IR::F32>(GetReg39(insn));
 }
 
+IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 8, IR::Reg> src;
+    } const index{insn};
+    const IR::Reg reg{index.src};
+    if (!IR::IsAligned(reg, 2)) {
+        throw NotImplementedException("Unaligned source register {}", reg);
+    }
+    return ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)));
+}
+
 static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
     union {
         u64 raw;
@@ -75,15 +96,31 @@ static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
 }
 
 IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
-    const auto[binding, byte_offset]{CbufAddr(insn)};
+    const auto [binding, byte_offset]{CbufAddr(insn)};
     return ir.GetCbuf(binding, byte_offset);
 }
 
 IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
-    const auto[binding, byte_offset]{CbufAddr(insn)};
+    const auto [binding, byte_offset]{CbufAddr(insn)};
     return ir.GetFloatCbuf(binding, byte_offset);
 }
 
+IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 1, u64> unaligned;
+    } const cbuf{insn};
+
+    const auto [binding, offset_value]{CbufAddr(insn)};
+    const bool unaligned{cbuf.unaligned != 0};
+    const u32 offset{offset_value.U32()};
+    const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4};
+
+    const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
+    const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
+    return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
+}
+
 IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
     union {
         u64 raw;
@@ -110,6 +147,17 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
     return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
 }
 
+IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 19, u64> value;
+        BitField<56, 1, u64> is_negative;
+    } const imm{insn};
+    const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
+    const u64 value{imm.value << 44};
+    return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
+}
+
 IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
     union {
         u64 raw;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index 761b646669..e3e298c3b6 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -351,12 +351,15 @@ public:
     [[nodiscard]] IR::U32 GetReg39(u64 insn);
     [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
     [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
+    [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
 
     [[nodiscard]] IR::U32 GetCbuf(u64 insn);
     [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
+    [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
 
     [[nodiscard]] IR::U32 GetImm20(u64 insn);
     [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
+    [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
 
     [[nodiscard]] IR::U32 GetImm32(u64 insn);
     [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 0325f14eaf..9675cef541 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -85,18 +85,6 @@ void TranslatorVisitor::CS2R(u64) {
     ThrowNotImplemented(Opcode::CS2R);
 }
 
-void TranslatorVisitor::DADD_reg(u64) {
-    ThrowNotImplemented(Opcode::DADD_reg);
-}
-
-void TranslatorVisitor::DADD_cbuf(u64) {
-    ThrowNotImplemented(Opcode::DADD_cbuf);
-}
-
-void TranslatorVisitor::DADD_imm(u64) {
-    ThrowNotImplemented(Opcode::DADD_imm);
-}
-
 void TranslatorVisitor::DEPBAR() {
     // DEPBAR is a no-op
 }
-- 
cgit v1.2.3-70-g09d2


From f91859efd259995806c2944f7941b105b58300d3 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 20 Mar 2021 05:04:12 -0300
Subject: shader: Implement I2F

---
 src/shader_recompiler/CMakeLists.txt               |   1 +
 .../backend/spirv/emit_context.cpp                 |   2 +
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  13 ++
 .../backend/spirv/emit_spirv_convert.cpp           |  48 ++++++
 .../backend/spirv/emit_spirv_integer.cpp           |   4 +
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 145 +++++++++++------
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  14 +-
 src/shader_recompiler/frontend/ir/opcodes.inc      |  13 ++
 .../frontend/maxwell/translate/impl/impl.cpp       |  21 +++
 .../frontend/maxwell/translate/impl/impl.h         |   2 +
 .../impl/integer_floating_point_conversion.cpp     | 173 +++++++++++++++++++++
 .../maxwell/translate/impl/not_implemented.cpp     |  12 --
 .../maxwell/translate/impl/texture_fetch.cpp       |   2 +-
 .../translate/impl/texture_fetch_swizzled.cpp      |   2 +-
 .../ir_opt/collect_shader_info_pass.cpp            |  28 ++++
 .../ir_opt/lower_fp16_to_fp32.cpp                  |  16 ++
 .../renderer_vulkan/vk_pipeline_cache.cpp          |   3 +-
 17 files changed, 429 insertions(+), 70 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 31c3941064..d0f0ec7752 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -84,6 +84,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/integer_add_three_input.cpp
     frontend/maxwell/translate/impl/integer_compare.cpp
     frontend/maxwell/translate/impl/integer_compare_and_set.cpp
+    frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
     frontend/maxwell/translate/impl/integer_funnel_shift.cpp
     frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
     frontend/maxwell/translate/impl/integer_popcount.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 6c79b611bf..6c8f16562f 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -89,6 +89,8 @@ Id EmitContext::Def(const IR::Value& value) {
         return value.U1() ? true_value : false_value;
     case IR::Type::U32:
         return Constant(U32[1], value.U32());
+    case IR::Type::U64:
+        return Constant(U64, value.U64());
     case IR::Type::F32:
         return Constant(F32[1], value.F32());
     case IR::Type::F64:
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index ae121f5344..1fe65f8a9c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -243,6 +243,7 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b);
 Id EmitINeg32(EmitContext& ctx, Id value);
 Id EmitINeg64(EmitContext& ctx, Id value);
 Id EmitIAbs32(EmitContext& ctx, Id value);
+Id EmitIAbs64(EmitContext& ctx, Id value);
 Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift);
 Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift);
 Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift);
@@ -302,16 +303,28 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value);
 Id EmitConvertF32F16(EmitContext& ctx, Id value);
 Id EmitConvertF32F64(EmitContext& ctx, Id value);
 Id EmitConvertF64F32(EmitContext& ctx, Id value);
+Id EmitConvertF16S8(EmitContext& ctx, Id value);
+Id EmitConvertF16S16(EmitContext& ctx, Id value);
 Id EmitConvertF16S32(EmitContext& ctx, Id value);
 Id EmitConvertF16S64(EmitContext& ctx, Id value);
+Id EmitConvertF16U8(EmitContext& ctx, Id value);
+Id EmitConvertF16U16(EmitContext& ctx, Id value);
 Id EmitConvertF16U32(EmitContext& ctx, Id value);
 Id EmitConvertF16U64(EmitContext& ctx, Id value);
+Id EmitConvertF32S8(EmitContext& ctx, Id value);
+Id EmitConvertF32S16(EmitContext& ctx, Id value);
 Id EmitConvertF32S32(EmitContext& ctx, Id value);
 Id EmitConvertF32S64(EmitContext& ctx, Id value);
+Id EmitConvertF32U8(EmitContext& ctx, Id value);
+Id EmitConvertF32U16(EmitContext& ctx, Id value);
 Id EmitConvertF32U32(EmitContext& ctx, Id value);
 Id EmitConvertF32U64(EmitContext& ctx, Id value);
+Id EmitConvertF64S8(EmitContext& ctx, Id value);
+Id EmitConvertF64S16(EmitContext& ctx, Id value);
 Id EmitConvertF64S32(EmitContext& ctx, Id value);
 Id EmitConvertF64S64(EmitContext& ctx, Id value);
+Id EmitConvertF64U8(EmitContext& ctx, Id value);
+Id EmitConvertF64U16(EmitContext& ctx, Id value);
 Id EmitConvertF64U32(EmitContext& ctx, Id value);
 Id EmitConvertF64U64(EmitContext& ctx, Id value);
 Id EmitBindlessImageSampleImplicitLod(EmitContext&);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
index 2aff673aa5..757165626b 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
@@ -102,6 +102,14 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) {
     return ctx.OpFConvert(ctx.F64[1], value);
 }
 
+Id EmitConvertF16S8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16S16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F16[1], value);
+}
+
 Id EmitConvertF16S32(EmitContext& ctx, Id value) {
     return ctx.OpConvertSToF(ctx.F16[1], value);
 }
@@ -110,6 +118,14 @@ Id EmitConvertF16S64(EmitContext& ctx, Id value) {
     return ctx.OpConvertSToF(ctx.F16[1], value);
 }
 
+Id EmitConvertF16U8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16U16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F16[1], value);
+}
+
 Id EmitConvertF16U32(EmitContext& ctx, Id value) {
     return ctx.OpConvertUToF(ctx.F16[1], value);
 }
@@ -118,6 +134,14 @@ Id EmitConvertF16U64(EmitContext& ctx, Id value) {
     return ctx.OpConvertUToF(ctx.F16[1], value);
 }
 
+Id EmitConvertF32S8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value));
+}
+
+Id EmitConvertF32S16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value));
+}
+
 Id EmitConvertF32S32(EmitContext& ctx, Id value) {
     return ctx.OpConvertSToF(ctx.F32[1], value);
 }
@@ -126,6 +150,14 @@ Id EmitConvertF32S64(EmitContext& ctx, Id value) {
     return ctx.OpConvertSToF(ctx.F32[1], value);
 }
 
+Id EmitConvertF32U8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value));
+}
+
+Id EmitConvertF32U16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value));
+}
+
 Id EmitConvertF32U32(EmitContext& ctx, Id value) {
     return ctx.OpConvertUToF(ctx.F32[1], value);
 }
@@ -134,6 +166,14 @@ Id EmitConvertF32U64(EmitContext& ctx, Id value) {
     return ctx.OpConvertUToF(ctx.F32[1], value);
 }
 
+Id EmitConvertF64S8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value));
+}
+
+Id EmitConvertF64S16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value));
+}
+
 Id EmitConvertF64S32(EmitContext& ctx, Id value) {
     return ctx.OpConvertSToF(ctx.F64[1], value);
 }
@@ -142,6 +182,14 @@ Id EmitConvertF64S64(EmitContext& ctx, Id value) {
     return ctx.OpConvertSToF(ctx.F64[1], value);
 }
 
+Id EmitConvertF64U8(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value));
+}
+
+Id EmitConvertF64U16(EmitContext& ctx, Id value) {
+    return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value));
+}
+
 Id EmitConvertF64U32(EmitContext& ctx, Id value) {
     return ctx.OpConvertUToF(ctx.F64[1], value);
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
index c9de204b01..a9c5e9ccaa 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -70,6 +70,10 @@ Id EmitIAbs32(EmitContext& ctx, Id value) {
     return ctx.OpSAbs(ctx.U32[1], value);
 }
 
+Id EmitIAbs64(EmitContext& ctx, Id value) {
+    return ctx.OpSAbs(ctx.U64, value);
+}
+
 Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) {
     return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 672836c0b7..652f6949e6 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -53,6 +53,10 @@ U64 IREmitter::Imm64(u64 value) const {
     return U64{Value{value}};
 }
 
+U64 IREmitter::Imm64(s64 value) const {
+    return U64{Value{static_cast<u64>(value)}};
+}
+
 F64 IREmitter::Imm64(f64 value) const {
     return F64{Value{value}};
 }
@@ -363,7 +367,7 @@ U1 IREmitter::GetSparseFromOp(const Value& op) {
 }
 
 F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
-    if (a.Type() != a.Type()) {
+    if (a.Type() != b.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
     }
     switch (a.Type()) {
@@ -974,8 +978,15 @@ U32U64 IREmitter::INeg(const U32U64& value) {
     }
 }
 
-U32 IREmitter::IAbs(const U32& value) {
-    return Inst<U32>(Opcode::IAbs32, value);
+U32U64 IREmitter::IAbs(const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::IAbs32, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::IAbs64, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
 }
 
 U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) {
@@ -1074,8 +1085,25 @@ U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
     return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
 }
 
-U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) {
-    return Inst<U1>(Opcode::IEqual, lhs, rhs);
+U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::U32:
+        return Inst<U1>(Opcode::IEqual, lhs, rhs);
+    case Type::U64: {
+        // Manually compare the unpacked values
+        const Value lhs_vector{UnpackUint2x32(lhs)};
+        const Value rhs_vector{UnpackUint2x32(rhs)};
+        return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)},
+                                 IR::U32{CompositeExtract(rhs_vector, 0)}),
+                          IEqual(IR::U32{CompositeExtract(lhs_vector, 1)},
+                                 IR::U32{CompositeExtract(rhs_vector, 1)}));
+    }
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
 }
 
 U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
@@ -1198,79 +1226,96 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v
     }
 }
 
-F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) {
-    switch (bitsize) {
+F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) {
+    switch (dest_bitsize) {
     case 16:
-        switch (value.Type()) {
-        case Type::U32:
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F16>(Opcode::ConvertF16S8, value);
+        case 16:
+            return Inst<F16>(Opcode::ConvertF16S16, value);
+        case 32:
             return Inst<F16>(Opcode::ConvertF16S32, value);
-        case Type::U64:
+        case 64:
             return Inst<F16>(Opcode::ConvertF16S64, value);
-        default:
-            ThrowInvalidType(value.Type());
         }
+        break;
     case 32:
-        switch (value.Type()) {
-        case Type::U32:
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F32>(Opcode::ConvertF32S8, value);
+        case 16:
+            return Inst<F32>(Opcode::ConvertF32S16, value);
+        case 32:
             return Inst<F32>(Opcode::ConvertF32S32, value);
-        case Type::U64:
+        case 64:
             return Inst<F32>(Opcode::ConvertF32S64, value);
-        default:
-            ThrowInvalidType(value.Type());
         }
+        break;
     case 64:
-        switch (value.Type()) {
-        case Type::U32:
-            return Inst<F16>(Opcode::ConvertF64S32, value);
-        case Type::U64:
-            return Inst<F16>(Opcode::ConvertF64S64, value);
-        default:
-            ThrowInvalidType(value.Type());
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F64>(Opcode::ConvertF64S8, value);
+        case 16:
+            return Inst<F64>(Opcode::ConvertF64S16, value);
+        case 32:
+            return Inst<F64>(Opcode::ConvertF64S32, value);
+        case 64:
+            return Inst<F64>(Opcode::ConvertF64S64, value);
         }
-    default:
-        throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+        break;
     }
+    throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
 }
 
-F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) {
-    switch (bitsize) {
+F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) {
+    switch (dest_bitsize) {
     case 16:
-        switch (value.Type()) {
-        case Type::U32:
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F16>(Opcode::ConvertF16U8, value);
+        case 16:
+            return Inst<F16>(Opcode::ConvertF16U16, value);
+        case 32:
             return Inst<F16>(Opcode::ConvertF16U32, value);
-        case Type::U64:
+        case 64:
             return Inst<F16>(Opcode::ConvertF16U64, value);
-        default:
-            ThrowInvalidType(value.Type());
         }
+        break;
     case 32:
-        switch (value.Type()) {
-        case Type::U32:
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F32>(Opcode::ConvertF32U8, value);
+        case 16:
+            return Inst<F32>(Opcode::ConvertF32U16, value);
+        case 32:
             return Inst<F32>(Opcode::ConvertF32U32, value);
-        case Type::U64:
+        case 64:
             return Inst<F32>(Opcode::ConvertF32U64, value);
-        default:
-            ThrowInvalidType(value.Type());
         }
+        break;
     case 64:
-        switch (value.Type()) {
-        case Type::U32:
-            return Inst<F16>(Opcode::ConvertF64U32, value);
-        case Type::U64:
-            return Inst<F16>(Opcode::ConvertF64U64, value);
-        default:
-            ThrowInvalidType(value.Type());
+        switch (src_bitsize) {
+        case 8:
+            return Inst<F64>(Opcode::ConvertF64U8, value);
+        case 16:
+            return Inst<F64>(Opcode::ConvertF64U16, value);
+        case 32:
+            return Inst<F64>(Opcode::ConvertF64U32, value);
+        case 64:
+            return Inst<F64>(Opcode::ConvertF64U64, value);
         }
-    default:
-        throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+        break;
     }
+    throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
 }
 
-F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) {
+F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
+                                 const Value& value) {
     if (is_signed) {
-        return ConvertSToF(bitsize, value);
+        return ConvertSToF(dest_bitsize, src_bitsize, value);
     } else {
-        return ConvertUToF(bitsize, value);
+        return ConvertUToF(dest_bitsize, src_bitsize, value);
     }
 }
 
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 72af5db377..8edb111547 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -29,6 +29,7 @@ public:
     [[nodiscard]] U32 Imm32(s32 value) const;
     [[nodiscard]] F32 Imm32(f32 value) const;
     [[nodiscard]] U64 Imm64(u64 value) const;
+    [[nodiscard]] U64 Imm64(s64 value) const;
     [[nodiscard]] F64 Imm64(f64 value) const;
 
     void Branch(Block* label);
@@ -170,7 +171,7 @@ public:
     [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
     [[nodiscard]] U32 IMul(const U32& a, const U32& b);
     [[nodiscard]] U32U64 INeg(const U32U64& value);
-    [[nodiscard]] U32 IAbs(const U32& value);
+    [[nodiscard]] U32U64 IAbs(const U32U64& value);
     [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
     [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
     [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
@@ -193,7 +194,7 @@ public:
     [[nodiscard]] U32 UMax(const U32& a, const U32& b);
 
     [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
-    [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs);
+    [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
     [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
     [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
     [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
@@ -207,9 +208,12 @@ public:
     [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
     [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
     [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
-    [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value);
-    [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value);
-    [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value);
+    [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize,
+                                        const Value& value);
+    [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize,
+                                        const Value& value);
+    [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
+                                        const Value& value);
 
     [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
     [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 593faca528..8471db7b9b 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -247,6 +247,7 @@ OPCODE(IMul32,                                              U32,            U32,
 OPCODE(INeg32,                                              U32,            U32,                                                                            )
 OPCODE(INeg64,                                              U64,            U64,                                                                            )
 OPCODE(IAbs32,                                              U32,            U32,                                                                            )
+OPCODE(IAbs64,                                              U64,            U64,                                                                            )
 OPCODE(ShiftLeftLogical32,                                  U32,            U32,            U32,                                                            )
 OPCODE(ShiftLeftLogical64,                                  U64,            U64,            U32,                                                            )
 OPCODE(ShiftRightLogical32,                                 U32,            U32,            U32,                                                            )
@@ -311,16 +312,28 @@ OPCODE(ConvertF16F32,                                       F16,            F32,
 OPCODE(ConvertF32F16,                                       F32,            F16,                                                                            )
 OPCODE(ConvertF32F64,                                       F32,            F64,                                                                            )
 OPCODE(ConvertF64F32,                                       F64,            F32,                                                                            )
+OPCODE(ConvertF16S8,                                        F16,            U32,                                                                            )
+OPCODE(ConvertF16S16,                                       F16,            U32,                                                                            )
 OPCODE(ConvertF16S32,                                       F16,            U32,                                                                            )
 OPCODE(ConvertF16S64,                                       F16,            U64,                                                                            )
+OPCODE(ConvertF16U8,                                        F16,            U32,                                                                            )
+OPCODE(ConvertF16U16,                                       F16,            U32,                                                                            )
 OPCODE(ConvertF16U32,                                       F16,            U32,                                                                            )
 OPCODE(ConvertF16U64,                                       F16,            U64,                                                                            )
+OPCODE(ConvertF32S8,                                        F32,            U32,                                                                            )
+OPCODE(ConvertF32S16,                                       F32,            U32,                                                                            )
 OPCODE(ConvertF32S32,                                       F32,            U32,                                                                            )
 OPCODE(ConvertF32S64,                                       F32,            U64,                                                                            )
+OPCODE(ConvertF32U8,                                        F32,            U32,                                                                            )
+OPCODE(ConvertF32U16,                                       F32,            U32,                                                                            )
 OPCODE(ConvertF32U32,                                       F32,            U32,                                                                            )
 OPCODE(ConvertF32U64,                                       F32,            U64,                                                                            )
+OPCODE(ConvertF64S8,                                        F64,            U32,                                                                            )
+OPCODE(ConvertF64S16,                                       F64,            U32,                                                                            )
 OPCODE(ConvertF64S32,                                       F64,            U32,                                                                            )
 OPCODE(ConvertF64S64,                                       F64,            U64,                                                                            )
+OPCODE(ConvertF64U8,                                        F64,            U32,                                                                            )
+OPCODE(ConvertF64U16,                                       F64,            U32,                                                                            )
 OPCODE(ConvertF64U32,                                       F64,            U32,                                                                            )
 OPCODE(ConvertF64U64,                                       F64,            U64,                                                                            )
 
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index e444dcd4fb..c9af830101 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -121,6 +121,22 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
     return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
 }
 
+IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
+    union {
+        u64 raw;
+        BitField<20, 1, u64> unaligned;
+    } const cbuf{insn};
+
+    if (cbuf.unaligned != 0) {
+        throw NotImplementedException("Unaligned packed constant buffer read");
+    }
+    const auto [binding, lower_offset]{CbufAddr(insn)};
+    const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
+    const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
+    const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
+    return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
+}
+
 IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
     union {
         u64 raw;
@@ -158,6 +174,11 @@ IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
     return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
 }
 
+IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
+    const s64 value{GetImm20(insn).U32()};
+    return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
+}
+
 IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
     union {
         u64 raw;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index ed81d9c369..cb66cca25f 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -356,10 +356,12 @@ public:
     [[nodiscard]] IR::U32 GetCbuf(u64 insn);
     [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
     [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
+    [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
 
     [[nodiscard]] IR::U32 GetImm20(u64 insn);
     [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
     [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
+    [[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
 
     [[nodiscard]] IR::U32 GetImm32(u64 insn);
     [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
new file mode 100644
index 0000000000..e8b5ae1d2d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -0,0 +1,173 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class FloatFormat : u64 {
+    F16 = 1,
+    F32 = 2,
+    F64 = 3,
+};
+
+enum class IntFormat : u64 {
+    U8 = 0,
+    U16 = 1,
+    U32 = 2,
+    U64 = 3,
+};
+
+union Encoding {
+    u64 raw;
+    BitField<0, 8, IR::Reg> dest_reg;
+    BitField<8, 2, FloatFormat> float_format;
+    BitField<10, 2, IntFormat> int_format;
+    BitField<13, 1, u64> is_signed;
+    BitField<39, 2, FpRounding> fp_rounding;
+    BitField<41, 2, u64> selector;
+    BitField<47, 1, u64> cc;
+    BitField<45, 1, u64> neg;
+    BitField<49, 1, u64> abs;
+};
+
+bool Is64(u64 insn) {
+    return Encoding{insn}.int_format == IntFormat::U64;
+}
+
+int BitSize(FloatFormat format) {
+    switch (format) {
+    case FloatFormat::F16:
+        return 16;
+    case FloatFormat::F32:
+        return 32;
+    case FloatFormat::F64:
+        return 64;
+    }
+    throw NotImplementedException("Invalid float format {}", format);
+}
+
+IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
+    const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
+    const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
+    const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
+    const IR::U1 is_least{v.ir.IEqual(value, least_value)};
+    return IR::U32{v.ir.Select(is_least, value, absolute)};
+}
+
+void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
+    const Encoding i2f{insn};
+    if (i2f.cc != 0) {
+        throw NotImplementedException("CC");
+    }
+    const bool is_signed{i2f.is_signed != 0};
+    int src_bitsize{};
+    switch (i2f.int_format) {
+    case IntFormat::U8:
+        src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+                                   v.ir.Imm32(8), is_signed);
+        if (i2f.abs != 0) {
+            src = SmallAbs(v, src, 8);
+        }
+        src_bitsize = 8;
+        break;
+    case IntFormat::U16:
+        if (i2f.selector == 1 || i2f.selector == 3) {
+            throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
+        }
+        src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+                                   v.ir.Imm32(16), is_signed);
+        if (i2f.abs != 0) {
+            src = SmallAbs(v, src, 16);
+        }
+        src_bitsize = 16;
+        break;
+    case IntFormat::U32:
+    case IntFormat::U64:
+        if (i2f.selector != 0) {
+            throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
+        }
+        if (i2f.abs != 0 && is_signed) {
+            src = v.ir.IAbs(src);
+        }
+        src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
+        break;
+    }
+    const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
+    const int dst_bitsize{BitSize(i2f.float_format)};
+    IR::F16F32F64 value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src)};
+    if (i2f.neg != 0) {
+        if (i2f.abs != 0 || !is_signed) {
+            // We know the value is positive
+            value = v.ir.FPNeg(value);
+        } else {
+            // Only negate if the input isn't the lowest value
+            IR::U1 is_least;
+            if (src_bitsize == 64) {
+                is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
+            } else {
+                const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
+                is_least = v.ir.IEqual(src, least_value);
+            }
+            value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
+        }
+    }
+    switch (i2f.float_format) {
+    case FloatFormat::F16: {
+        const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
+        v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
+        break;
+    }
+    case FloatFormat::F32:
+        v.F(i2f.dest_reg, value);
+        break;
+    case FloatFormat::F64: {
+        if (!IR::IsAligned(i2f.dest_reg, 2)) {
+            throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
+        }
+        const IR::Value vector{v.ir.UnpackDouble2x32(value)};
+        for (int i = 0; i < 2; ++i) {
+            v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)});
+        }
+        break;
+    }
+    default:
+        throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::I2F_reg(u64 insn) {
+    if (Is64(insn)) {
+        union {
+            u64 raw;
+            BitField<20, 8, IR::Reg> reg;
+        } const value{insn};
+        const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
+        I2F(*this, insn, ir.PackUint2x32(regs));
+    } else {
+        I2F(*this, insn, GetReg20(insn));
+    }
+}
+
+void TranslatorVisitor::I2F_cbuf(u64 insn) {
+    if (Is64(insn)) {
+        I2F(*this, insn, GetPackedCbuf(insn));
+    } else {
+        I2F(*this, insn, GetCbuf(insn));
+    }
+}
+
+void TranslatorVisitor::I2F_imm(u64 insn) {
+    if (Is64(insn)) {
+        I2F(*this, insn, GetPackedImm20(insn));
+    } else {
+        I2F(*this, insn, GetImm20(insn));
+    }
+}
+
+} // namespace Shader::Maxwell
\ No newline at end of file
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index a4367fc5a6..4078feafa8 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -241,18 +241,6 @@ void TranslatorVisitor::HSETP2_imm(u64) {
     ThrowNotImplemented(Opcode::HSETP2_imm);
 }
 
-void TranslatorVisitor::I2F_reg(u64) {
-    ThrowNotImplemented(Opcode::I2F_reg);
-}
-
-void TranslatorVisitor::I2F_cbuf(u64) {
-    ThrowNotImplemented(Opcode::I2F_cbuf);
-}
-
-void TranslatorVisitor::I2F_imm(u64) {
-    ThrowNotImplemented(Opcode::I2F_imm);
-}
-
 void TranslatorVisitor::IDE(u64) {
     ThrowNotImplemented(Opcode::IDE);
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
index 0fbb87ec43..b691b4d1fd 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -56,7 +56,7 @@ Shader::TextureType GetType(TextureType type, bool dc) {
 }
 
 IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
-    const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }};
+    const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
     switch (type) {
     case TextureType::_1D:
         return v.F(reg);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
index 54f0df7547..d5fda20f42 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -65,7 +65,7 @@ IR::Value Composite(TranslatorVisitor& v, Args... regs) {
 }
 
 IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
-    return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
+    return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
 }
 
 IR::Value Sample(TranslatorVisitor& v, u64 insn) {
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index fbbe286322..e72505d610 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -79,6 +79,14 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::ConvertU16F16:
     case IR::Opcode::ConvertU32F16:
     case IR::Opcode::ConvertU64F16:
+    case IR::Opcode::ConvertF16S8:
+    case IR::Opcode::ConvertF16S16:
+    case IR::Opcode::ConvertF16S32:
+    case IR::Opcode::ConvertF16S64:
+    case IR::Opcode::ConvertF16U8:
+    case IR::Opcode::ConvertF16U16:
+    case IR::Opcode::ConvertF16U32:
+    case IR::Opcode::ConvertF16U64:
     case IR::Opcode::FPAbs16:
     case IR::Opcode::FPAdd16:
     case IR::Opcode::FPCeil16:
@@ -105,6 +113,14 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::FPRoundEven64:
     case IR::Opcode::FPSaturate64:
     case IR::Opcode::FPTrunc64:
+    case IR::Opcode::ConvertF64S8:
+    case IR::Opcode::ConvertF64S16:
+    case IR::Opcode::ConvertF64S32:
+    case IR::Opcode::ConvertF64S64:
+    case IR::Opcode::ConvertF64U8:
+    case IR::Opcode::ConvertF64U16:
+    case IR::Opcode::ConvertF64U32:
+    case IR::Opcode::ConvertF64U64:
         info.uses_fp64 = true;
         break;
     default:
@@ -123,6 +139,12 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::WriteStorageU8:
     case IR::Opcode::WriteStorageS8:
     case IR::Opcode::SelectU8:
+    case IR::Opcode::ConvertF16S8:
+    case IR::Opcode::ConvertF16U8:
+    case IR::Opcode::ConvertF32S8:
+    case IR::Opcode::ConvertF32U8:
+    case IR::Opcode::ConvertF64S8:
+    case IR::Opcode::ConvertF64U8:
         info.uses_int8 = true;
         break;
     default:
@@ -149,6 +171,12 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::ConvertU16F16:
     case IR::Opcode::ConvertU16F32:
     case IR::Opcode::ConvertU16F64:
+    case IR::Opcode::ConvertF16S16:
+    case IR::Opcode::ConvertF16U16:
+    case IR::Opcode::ConvertF32S16:
+    case IR::Opcode::ConvertF32U16:
+    case IR::Opcode::ConvertF64S16:
+    case IR::Opcode::ConvertF64U16:
         info.uses_int16 = true;
         break;
     default:
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
index 74acb8bb6b..baa3d22df4 100644
--- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -70,6 +70,22 @@ IR::Opcode Replace(IR::Opcode op) {
         return IR::Opcode::Identity;
     case IR::Opcode::ConvertF16F32:
         return IR::Opcode::Identity;
+    case IR::Opcode::ConvertF16S8:
+        return IR::Opcode::ConvertF32S8;
+    case IR::Opcode::ConvertF16S16:
+        return IR::Opcode::ConvertF32S16;
+    case IR::Opcode::ConvertF16S32:
+        return IR::Opcode::ConvertF32S32;
+    case IR::Opcode::ConvertF16S64:
+        return IR::Opcode::ConvertF32S64;
+    case IR::Opcode::ConvertF16U8:
+        return IR::Opcode::ConvertF32U8;
+    case IR::Opcode::ConvertF16U16:
+        return IR::Opcode::ConvertF32U16;
+    case IR::Opcode::ConvertF16U32:
+        return IR::Opcode::ConvertF32U32;
+    case IR::Opcode::ConvertF16U64:
+        return IR::Opcode::ConvertF32U64;
     default:
         return op;
     }
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index c9da2080d4..d1399a46da 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -227,6 +227,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
       update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
       buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {
     const auto& float_control{device.FloatControlProperties()};
+    const VkDriverIdKHR driver_id{device.GetDriverID()};
     profile = Shader::Profile{
         .unified_descriptor_binding = true,
         .support_float_controls = true,
@@ -242,7 +243,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
             float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
         .support_fp32_signed_zero_nan_preserve =
             float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
-        .has_broken_spirv_clamp = true, // TODO: is_intel
+        .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
     };
 }
 
-- 
cgit v1.2.3-70-g09d2


From 112b8f00f0da0e031bb62a7a7a44469d3a5518a6 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sun, 21 Mar 2021 01:32:02 -0400
Subject: shader: Add FP64 register load/store helpers

---
 .../frontend/maxwell/translate/impl/double_add.cpp | 16 ++-----------
 .../frontend/maxwell/translate/impl/impl.cpp       | 27 ++++++++++++++++------
 .../frontend/maxwell/translate/impl/impl.h         |  2 ++
 3 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
index bece191d71..3db09d0c28 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -22,19 +22,11 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
         BitField<48, 1, u64> neg_a;
         BitField<49, 1, u64> abs_b;
     } const dadd{insn};
-
-    if (!IR::IsAligned(dadd.dest_reg, 2)) {
-        throw NotImplementedException("Unaligned destination register {}", dadd.dest_reg.Value());
-    }
-    if (!IR::IsAligned(dadd.src_a_reg, 2)) {
-        throw NotImplementedException("Unaligned destination register {}", dadd.src_a_reg.Value());
-    }
     if (dadd.cc != 0) {
         throw NotImplementedException("DADD CC");
     }
 
-    const IR::Reg reg_a{dadd.src_a_reg};
-    const IR::F64 src_a{v.ir.PackDouble2x32(v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)))};
+    const IR::F64 src_a{v.D(dadd.src_a_reg)};
     const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
     const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
 
@@ -43,12 +35,8 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
         .rounding{CastFpRounding(dadd.fp_rounding)},
         .fmz_mode{IR::FmzMode::None},
     };
-    const IR::F64 value{v.ir.FPAdd(op_a, op_b, control)};
-    const IR::Value result{v.ir.UnpackDouble2x32(value)};
 
-    for (int i = 0; i < 2; i++) {
-        v.X(dadd.dest_reg + i, IR::U32{v.ir.CompositeExtract(result, i)});
-    }
+    v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
 }
 } // Anonymous namespace
 
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index c9af830101..2d2f6f9c65 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -25,6 +25,13 @@ IR::F32 TranslatorVisitor::F(IR::Reg reg) {
     return ir.BitCast<IR::F32>(X(reg));
 }
 
+IR::F64 TranslatorVisitor::D(IR::Reg reg) {
+    if (!IR::IsAligned(reg, 2)) {
+        throw NotImplementedException("Unaligned source register {}", reg);
+    }
+    return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
 void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
     ir.SetReg(dest_reg, value);
 }
@@ -33,6 +40,16 @@ void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
     X(dest_reg, ir.BitCast<IR::U32>(value));
 }
 
+void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
+    if (!IR::IsAligned(dest_reg, 2)) {
+        throw NotImplementedException("Unaligned destination register {}", dest_reg);
+    }
+    const IR::Value result{ir.UnpackDouble2x32(value)};
+    for (int i = 0; i < 2; i++) {
+        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
+    }
+}
+
 IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
     union {
         u64 raw;
@@ -68,13 +85,9 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
 IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
     union {
         u64 raw;
-        BitField<20, 8, IR::Reg> src;
-    } const index{insn};
-    const IR::Reg reg{index.src};
-    if (!IR::IsAligned(reg, 2)) {
-        throw NotImplementedException("Unaligned source register {}", reg);
-    }
-    return ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)));
+        BitField<20, 8, IR::Reg> index;
+    } const reg{insn};
+    return D(reg.index);
 }
 
 static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index cb66cca25f..1a1073fa7d 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -342,9 +342,11 @@ public:
 
     [[nodiscard]] IR::U32 X(IR::Reg reg);
     [[nodiscard]] IR::F32 F(IR::Reg reg);
+    [[nodiscard]] IR::F64 D(IR::Reg reg);
 
     void X(IR::Reg dest_reg, const IR::U32& value);
     void F(IR::Reg dest_reg, const IR::F32& value);
+    void D(IR::Reg dest_reg, const IR::F64& value);
 
     [[nodiscard]] IR::U32 GetReg8(u64 insn);
     [[nodiscard]] IR::U32 GetReg20(u64 insn);
-- 
cgit v1.2.3-70-g09d2


From c858b8ba97d3ff79dcff0795c1184ee356f2cd1a Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sun, 21 Mar 2021 02:09:14 -0400
Subject: shader: Implement DMUL and DFMA

Also add a missing const on DADD
---
 src/shader_recompiler/CMakeLists.txt               |  2 +
 src/shader_recompiler/frontend/maxwell/maxwell.inc |  2 +-
 .../frontend/maxwell/translate/impl/double_add.cpp |  2 +-
 .../translate/impl/double_fused_multiply_add.cpp   | 53 ++++++++++++++++++++++
 .../maxwell/translate/impl/double_multiply.cpp     | 45 ++++++++++++++++++
 .../frontend/maxwell/translate/impl/impl.cpp       |  8 ++++
 .../frontend/maxwell/translate/impl/impl.h         |  1 +
 .../maxwell/translate/impl/not_implemented.cpp     | 28 ------------
 8 files changed, 111 insertions(+), 30 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 87654931b1..fb5d603343 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -65,6 +65,8 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/common_funcs.h
     frontend/maxwell/translate/impl/condition_code_set.cpp
     frontend/maxwell/translate/impl/double_add.cpp
+    frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
+    frontend/maxwell/translate/impl/double_multiply.cpp
     frontend/maxwell/translate/impl/exit_program.cpp
     frontend/maxwell/translate/impl/find_leading_one.cpp
     frontend/maxwell/translate/impl/floating_point_add.cpp
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
index 1b87d04fc2..1dfaeb92f6 100644
--- a/src/shader_recompiler/frontend/maxwell/maxwell.inc
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -35,7 +35,7 @@ INST(DADD_imm,     "DADD (imm)",     "0011 100- 0111 0---")
 INST(DEPBAR,       "DEPBAR",         "1111 0000 1111 0---")
 INST(DFMA_reg,     "DFMA (reg)",     "0101 1011 0111 ----")
 INST(DFMA_rc,      "DFMA (rc)",      "0101 0011 0111 ----")
-INST(DFMA_cr,      "DFMA (cr)",      "0010 1011 0111 ----")
+INST(DFMA_cr,      "DFMA (cr)",      "0100 1011 0111 ----")
 INST(DFMA_imm,     "DFMA (imm)",     "0011 011- 0111 ----")
 INST(DMNMX_reg,    "DMNMX (reg)",    "0100 1100 0101 0---")
 INST(DMNMX_cbuf,   "DMNMX (cbuf)",   "0101 1100 0101 0---")
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
index 3db09d0c28..ac1433dea7 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -30,7 +30,7 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
     const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
     const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
 
-    IR::FpControl control{
+    const IR::FpControl control{
         .no_contraction{true},
         .rounding{CastFpRounding(dadd.fp_rounding)},
         .fmz_mode{IR::FmzMode::None},
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
new file mode 100644
index 0000000000..ff73218629
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<50, 2, FpRounding> fp_rounding;
+        BitField<48, 1, u64> neg_b;
+        BitField<49, 1, u64> neg_c;
+    } const dfma{insn};
+
+    const IR::F64 src_a{v.D(dfma.src_a_reg)};
+    const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
+    const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
+
+    const IR::FpControl control{
+        .no_contraction{true},
+        .rounding{CastFpRounding(dfma.fp_rounding)},
+        .fmz_mode{IR::FmzMode::None},
+    };
+
+    v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DFMA_reg(u64 insn) {
+    DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_cr(u64 insn) {
+    DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_rc(u64 insn) {
+    DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DFMA_imm(u64 insn) {
+    DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
new file mode 100644
index 0000000000..3e83d1c95c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -0,0 +1,45 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_a_reg;
+        BitField<39, 2, FpRounding> fp_rounding;
+        BitField<48, 1, u64> neg;
+    } const dmul{insn};
+
+    const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
+    const IR::FpControl control{
+        .no_contraction{true},
+        .rounding{CastFpRounding(dmul.fp_rounding)},
+        .fmz_mode{IR::FmzMode::None},
+    };
+
+    v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DMUL_reg(u64 insn) {
+    DMUL(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DMUL_cbuf(u64 insn) {
+    DMUL(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DMUL_imm(u64 insn) {
+    DMUL(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 2d2f6f9c65..758a0230a1 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -90,6 +90,14 @@ IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
     return D(reg.index);
 }
 
+IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
+    union {
+        u64 raw;
+        BitField<39, 8, IR::Reg> index;
+    } const reg{insn};
+    return D(reg.index);
+}
+
 static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
     union {
         u64 raw;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index 1a1073fa7d..c994fe803f 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -354,6 +354,7 @@ public:
     [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
     [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
     [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
+    [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
 
     [[nodiscard]] IR::U32 GetCbuf(u64 insn);
     [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index bd3c1f9d6a..4e069912ac 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -81,22 +81,6 @@ void TranslatorVisitor::DEPBAR() {
     // DEPBAR is a no-op
 }
 
-void TranslatorVisitor::DFMA_reg(u64) {
-    ThrowNotImplemented(Opcode::DFMA_reg);
-}
-
-void TranslatorVisitor::DFMA_rc(u64) {
-    ThrowNotImplemented(Opcode::DFMA_rc);
-}
-
-void TranslatorVisitor::DFMA_cr(u64) {
-    ThrowNotImplemented(Opcode::DFMA_cr);
-}
-
-void TranslatorVisitor::DFMA_imm(u64) {
-    ThrowNotImplemented(Opcode::DFMA_imm);
-}
-
 void TranslatorVisitor::DMNMX_reg(u64) {
     ThrowNotImplemented(Opcode::DMNMX_reg);
 }
@@ -109,18 +93,6 @@ void TranslatorVisitor::DMNMX_imm(u64) {
     ThrowNotImplemented(Opcode::DMNMX_imm);
 }
 
-void TranslatorVisitor::DMUL_reg(u64) {
-    ThrowNotImplemented(Opcode::DMUL_reg);
-}
-
-void TranslatorVisitor::DMUL_cbuf(u64) {
-    ThrowNotImplemented(Opcode::DMUL_cbuf);
-}
-
-void TranslatorVisitor::DMUL_imm(u64) {
-    ThrowNotImplemented(Opcode::DMUL_imm);
-}
-
 void TranslatorVisitor::DSET_reg(u64) {
     ThrowNotImplemented(Opcode::DSET_reg);
 }
-- 
cgit v1.2.3-70-g09d2


From 8cb9443cb99c4510e6ef26a91d09a31a8fa6281f Mon Sep 17 00:00:00 2001
From: FernandoS27 <fsahmkow27@gmail.com>
Date: Wed, 24 Mar 2021 00:02:30 +0100
Subject: shader: Fix F2I

---
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  3 +
 .../backend/spirv/emit_spirv_floating_point.cpp    | 20 ++++-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 18 +++++
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  1 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |  3 +
 .../impl/floating_point_conversion_integer.cpp     | 88 ++++++++++++++++++++--
 .../frontend/maxwell/translate/impl/impl.cpp       | 17 +++++
 .../frontend/maxwell/translate/impl/impl.h         |  2 +
 .../ir_opt/collect_shader_info_pass.cpp            |  2 +
 .../ir_opt/lower_fp16_to_fp32.cpp                  |  2 +
 10 files changed, 147 insertions(+), 9 deletions(-)

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 7fefcf2f28..6d4adafc71 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -189,6 +189,9 @@ Id EmitFPSqrt(EmitContext& ctx, Id value);
 Id EmitFPSaturate16(EmitContext& ctx, Id value);
 Id EmitFPSaturate32(EmitContext& ctx, Id value);
 Id EmitFPSaturate64(EmitContext& ctx, Id value);
+Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value);
+Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value);
+Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value);
 Id EmitFPRoundEven16(EmitContext& ctx, Id value);
 Id EmitFPRoundEven32(EmitContext& ctx, Id value);
 Id EmitFPRoundEven64(EmitContext& ctx, Id value);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
index 1fdf66cb69..24300af39a 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -15,7 +15,7 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
     return op;
 }
 
-Id Saturate(EmitContext& ctx, Id type, Id value, Id zero, Id one) {
+Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) {
     if (ctx.profile.has_broken_spirv_clamp) {
         return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one);
     } else {
@@ -139,19 +139,31 @@ Id EmitFPSqrt(EmitContext& ctx, Id value) {
 Id EmitFPSaturate16(EmitContext& ctx, Id value) {
     const Id zero{ctx.Constant(ctx.F16[1], u16{0})};
     const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})};
-    return Saturate(ctx, ctx.F16[1], value, zero, one);
+    return Clamp(ctx, ctx.F16[1], value, zero, one);
 }
 
 Id EmitFPSaturate32(EmitContext& ctx, Id value) {
     const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})};
     const Id one{ctx.Constant(ctx.F32[1], f32{1.0})};
-    return Saturate(ctx, ctx.F32[1], value, zero, one);
+    return Clamp(ctx, ctx.F32[1], value, zero, one);
 }
 
 Id EmitFPSaturate64(EmitContext& ctx, Id value) {
     const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})};
     const Id one{ctx.Constant(ctx.F64[1], f64{1.0})};
-    return Saturate(ctx, ctx.F64[1], value, zero, one);
+    return Clamp(ctx, ctx.F64[1], value, zero, one);
+}
+
+Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+    return Clamp(ctx, ctx.F16[1], value, min_value, max_value);
+}
+
+Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+    return Clamp(ctx, ctx.F32[1], value, min_value, max_value);
+}
+
+Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+    return Clamp(ctx, ctx.F64[1], value, min_value, max_value);
 }
 
 Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index ce610799a7..6280c08f65 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -731,6 +731,24 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
     }
 }
 
+F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value,
+                             const F16F32F64& max_value) {
+    if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) {
+        throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(),
+                              max_value.Type());
+    }
+    switch (value.Type()) {
+    case Type::F16:
+        return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value);
+    case Type::F32:
+        return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
 F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) {
     switch (value.Type()) {
     case Type::F16:
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 39109b0ded..ebbda78a9c 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -147,6 +147,7 @@ public:
     [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
     [[nodiscard]] F32 FPSqrt(const F32& value);
     [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
+    [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value);
     [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
     [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
     [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 8945c7b04c..dd17212a1b 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -192,6 +192,9 @@ OPCODE(FPLog2,                                              F32,            F32,
 OPCODE(FPSaturate16,                                        F16,            F16,                                                                            )
 OPCODE(FPSaturate32,                                        F32,            F32,                                                                            )
 OPCODE(FPSaturate64,                                        F64,            F64,                                                                            )
+OPCODE(FPClamp16,                                           F16,            F16,            F16,            F16,                                            )
+OPCODE(FPClamp32,                                           F32,            F32,            F32,            F32,                                            )
+OPCODE(FPClamp64,                                           F64,            F64,            F64,            F64,                                            )
 OPCODE(FPRoundEven16,                                       F16,            F16,                                                                            )
 OPCODE(FPRoundEven32,                                       F32,            F32,                                                                            )
 OPCODE(FPRoundEven64,                                       F64,            F64,                                                                            )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
index 81175627fd..7c5a72800c 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <limits>
+
 #include "common/common_types.h"
 #include "shader_recompiler/exception.h"
 #include "shader_recompiler/frontend/maxwell/opcodes.h"
@@ -55,6 +57,37 @@ size_t BitSize(DestFormat dest_format) {
     }
 }
 
+std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
+    if (is_signed) {
+        switch (format) {
+        case DestFormat::I16:
+            return {static_cast<f64>(std::numeric_limits<s16>::max()),
+                    static_cast<f64>(std::numeric_limits<s16>::min())};
+        case DestFormat::I32:
+            return {static_cast<f64>(std::numeric_limits<s32>::max()),
+                    static_cast<f64>(std::numeric_limits<s32>::min())};
+        case DestFormat::I64:
+            return {static_cast<f64>(std::numeric_limits<s64>::max()),
+                    static_cast<f64>(std::numeric_limits<s64>::min())};
+        default: {}
+        }
+    } else {
+        switch (format) {
+        case DestFormat::I16:
+            return {static_cast<f64>(std::numeric_limits<u16>::max()),
+                    static_cast<f64>(std::numeric_limits<u16>::min())};
+        case DestFormat::I32:
+            return {static_cast<f64>(std::numeric_limits<u32>::max()),
+                    static_cast<f64>(std::numeric_limits<u32>::min())};
+        case DestFormat::I64:
+            return {static_cast<f64>(std::numeric_limits<u64>::max()),
+                    static_cast<f64>(std::numeric_limits<u64>::min())};
+        default: {}
+        }
+    }
+    throw NotImplementedException("Invalid destination format {}", format);
+}
+
 IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
     union {
         u64 raw;
@@ -112,13 +145,58 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
     // For example converting F32 65537.0 to U16, the expected value is 0xffff,
 
     const bool is_signed{f2i.is_signed != 0};
-    const size_t bitsize{BitSize(f2i.dest_format)};
-    const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)};
+    const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
+
+    IR::F16F32F64 intermediate;
+    switch (f2i.src_format) {
+    case SrcFormat::F16: {
+        const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
+        const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
+        intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+        break;
+    }
+    case SrcFormat::F32: {
+        const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
+        const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
+        intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+        break;
+    }
+    case SrcFormat::F64: {
+        const IR::F64 max_val{v.ir.Imm64(max_bound)};
+        const IR::F64 min_val{v.ir.Imm64(min_bound)};
+        intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+        break;
+    }
+    default:
+        throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
+    }
+
+    const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
+    IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
+
+    bool handled_special_case = false;
+    const bool special_nan_cases =
+        (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
+    if (special_nan_cases) {
+        if (f2i.dest_format == DestFormat::I32) {
+            handled_special_case = true;
+            result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
+        } else if (f2i.dest_format == DestFormat::I64) {
+            handled_special_case = true;
+            result = IR::U64{
+                v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)};
+        }
+    }
+    if (!handled_special_case && is_signed) {
+        if (bitsize != 64) {
+            result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
+        } else {
+            result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)};
+        }
+    }
 
     if (bitsize == 64) {
-        const IR::Value vector{v.ir.UnpackUint2x32(result)};
-        v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)});
-        v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)});
+        v.L(f2i.dest_reg, result);
     } else {
         v.X(f2i.dest_reg, result);
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 758a0230a1..9bae89c109 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -21,6 +21,13 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) {
     return ir.GetReg(reg);
 }
 
+IR::U64 TranslatorVisitor::L(IR::Reg reg) {
+    if (!IR::IsAligned(reg, 2)) {
+        throw NotImplementedException("Unaligned source register {}", reg);
+    }
+    return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
 IR::F32 TranslatorVisitor::F(IR::Reg reg) {
     return ir.BitCast<IR::F32>(X(reg));
 }
@@ -36,6 +43,16 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
     ir.SetReg(dest_reg, value);
 }
 
+void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
+    if (!IR::IsAligned(dest_reg, 2)) {
+        throw NotImplementedException("Unaligned destination register {}", dest_reg);
+    }
+    const IR::Value result{ir.UnpackUint2x32(value)};
+    for (int i = 0; i < 2; i++) {
+        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
+    }
+}
+
 void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
     X(dest_reg, ir.BitCast<IR::U32>(value));
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index c994fe803f..54c31deb4e 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -341,10 +341,12 @@ public:
     void XMAD_imm(u64 insn);
 
     [[nodiscard]] IR::U32 X(IR::Reg reg);
+    [[nodiscard]] IR::U64 L(IR::Reg reg);
     [[nodiscard]] IR::F32 F(IR::Reg reg);
     [[nodiscard]] IR::F64 D(IR::Reg reg);
 
     void X(IR::Reg dest_reg, const IR::U32& value);
+    void L(IR::Reg dest_reg, const IR::U64& value);
     void F(IR::Reg dest_reg, const IR::F32& value);
     void D(IR::Reg dest_reg, const IR::F64& value);
 
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index db5138e4dc..32f276f3bb 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -105,6 +105,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::FPNeg16:
     case IR::Opcode::FPRoundEven16:
     case IR::Opcode::FPSaturate16:
+    case IR::Opcode::FPClamp16:
     case IR::Opcode::FPTrunc16:
     case IR::Opcode::FPOrdEqual16:
     case IR::Opcode::FPUnordEqual16:
@@ -148,6 +149,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::FPRecipSqrt64:
     case IR::Opcode::FPRoundEven64:
     case IR::Opcode::FPSaturate64:
+    case IR::Opcode::FPClamp64:
     case IR::Opcode::FPTrunc64:
     case IR::Opcode::FPOrdEqual64:
     case IR::Opcode::FPUnordEqual64:
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
index 0e8862f45d..0d2c91ed61 100644
--- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -30,6 +30,8 @@ IR::Opcode Replace(IR::Opcode op) {
         return IR::Opcode::FPRoundEven32;
     case IR::Opcode::FPSaturate16:
         return IR::Opcode::FPSaturate32;
+    case IR::Opcode::FPClamp16:
+        return IR::Opcode::FPClamp32;
     case IR::Opcode::FPTrunc16:
         return IR::Opcode::FPTrunc32;
     case IR::Opcode::CompositeConstructF16x2:
-- 
cgit v1.2.3-70-g09d2


From 6c51f496320f698e123207c09ca61e55180a31b5 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sun, 28 Mar 2021 22:23:45 -0400
Subject: shader: Implement FSWZADD

---
 src/shader_recompiler/CMakeLists.txt               |  1 +
 .../backend/spirv/emit_context.cpp                 |  8 ++++
 src/shader_recompiler/backend/spirv/emit_context.h |  2 +
 src/shader_recompiler/backend/spirv/emit_spirv.h   |  1 +
 .../backend/spirv/emit_spirv_warp.cpp              | 16 ++++++++
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |  3 ++
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  2 +
 src/shader_recompiler/frontend/ir/opcodes.inc      |  1 +
 .../translate/impl/floating_point_swizzled_add.cpp | 44 ++++++++++++++++++++++
 .../frontend/maxwell/translate/impl/impl.cpp       |  4 ++
 .../frontend/maxwell/translate/impl/impl.h         |  1 +
 .../maxwell/translate/impl/not_implemented.cpp     |  4 --
 .../ir_opt/collect_shader_info_pass.cpp            |  3 ++
 src/shader_recompiler/shader_info.h                |  1 +
 14 files changed, 87 insertions(+), 4 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 44ab929b79..5ce420cbf1 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -89,6 +89,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/floating_point_multiply.cpp
     frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
     frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
+    frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
     frontend/maxwell/translate/impl/half_floating_point_add.cpp
     frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
     frontend/maxwell/translate/impl/half_floating_point_helper.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 96d0e9b4d2..7531f8b214 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -393,6 +393,14 @@ void EmitContext::DefineInputs(const Info& info) {
         subgroup_local_invocation_id =
             DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId);
     }
+    if (info.uses_fswzadd) {
+        const Id f32_one{Constant(F32[1], 1.0f)};
+        const Id f32_minus_one{Constant(F32[1], -1.0f)};
+        const Id f32_zero{Constant(F32[1], 0.0f)};
+        fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero);
+        fswzadd_lut_b =
+            ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one);
+    }
     if (info.loads_position) {
         const bool is_fragment{stage != Stage::Fragment};
         const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 1a4e8221aa..ffac39c4f2 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -103,6 +103,8 @@ public:
     Id vertex_index{};
     Id base_vertex{};
     Id front_face{};
+    Id fswzadd_lut_a{};
+    Id fswzadd_lut_b{};
 
     Id local_memory{};
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 02648d769c..3d0c6f7ba4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -397,5 +397,6 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
                    Id segmentation_mask);
 Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
                         Id segmentation_mask);
+Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle);
 
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 44d8a347fc..cbc5b1c961 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -132,4 +132,20 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id
     return SelectValue(ctx, in_range, value, src_thread_id);
 }
 
+Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) {
+    const Id three{ctx.Constant(ctx.U32[1], 3)};
+    Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
+    mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Constant(ctx.U32[1], 1));
+    mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask);
+    mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
+
+    const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)};
+    const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)};
+
+    const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)};
+    const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)};
+    return ctx.OpFAdd(ctx.F32[1], result_a, result_b);
+}
+
 } // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 5524724878..505fba46a5 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -1602,4 +1602,7 @@ U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, cons
                                 const IR::U32& seg_mask) {
     return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask);
 }
+F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) {
+    return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle);
+}
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 17bc32fc83..8f3325738c 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -277,6 +277,8 @@ public:
                                   const IR::U32& seg_mask);
     [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
                                        const IR::U32& clamp, const IR::U32& seg_mask);
+    [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle,
+                                  FpControl control = {});
 
 private:
     IR::Block::iterator insertion_point;
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index fb79e3d8dc..717aa71caa 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -408,3 +408,4 @@ OPCODE(ShuffleIndex,                                        U32,            U32,
 OPCODE(ShuffleUp,                                           U32,            U32,            U32,            U32,            U32,                            )
 OPCODE(ShuffleDown,                                         U32,            U32,            U32,            U32,            U32,                            )
 OPCODE(ShuffleButterfly,                                    U32,            U32,            U32,            U32,            U32,                            )
+OPCODE(FSwizzleAdd,                                         F32,            F32,            F32,            U32,                                            )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
new file mode 100644
index 0000000000..e42921a216
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::FSWZADD(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<28, 8, u64> swizzle;
+        BitField<38, 1, u64> ndv;
+        BitField<39, 2, FpRounding> round;
+        BitField<44, 1, u64> ftz;
+        BitField<47, 1, u64> cc;
+    } const fswzadd{insn};
+
+    if (fswzadd.ndv != 0) {
+        throw NotImplementedException("FSWZADD NDV");
+    }
+
+    const IR::F32 src_a{GetFloatReg8(insn)};
+    const IR::F32 src_b{GetFloatReg20(insn)};
+    const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
+
+    const IR::FpControl fp_control{
+        .no_contraction{false},
+        .rounding{CastFpRounding(fswzadd.round)},
+        .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+    };
+
+    const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
+    F(fswzadd.dest_reg, result);
+
+    if (fswzadd.cc != 0) {
+        throw NotImplementedException("FSWZADD CC");
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 9bae89c109..30b570ce4d 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -91,6 +91,10 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
     return X(reg.index);
 }
 
+IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
+    return ir.BitCast<IR::F32>(GetReg8(insn));
+}
+
 IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
     return ir.BitCast<IR::F32>(GetReg20(insn));
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index 54c31deb4e..bf7d1bae87 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -353,6 +353,7 @@ public:
     [[nodiscard]] IR::U32 GetReg8(u64 insn);
     [[nodiscard]] IR::U32 GetReg20(u64 insn);
     [[nodiscard]] IR::U32 GetReg39(u64 insn);
+    [[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
     [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
     [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
     [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index a0057a4739..6a580f8319 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -89,10 +89,6 @@ void TranslatorVisitor::FCHK_imm(u64) {
     ThrowNotImplemented(Opcode::FCHK_imm);
 }
 
-void TranslatorVisitor::FSWZADD(u64) {
-    ThrowNotImplemented(Opcode::FSWZADD);
-}
-
 void TranslatorVisitor::GETCRSPTR(u64) {
     ThrowNotImplemented(Opcode::GETCRSPTR);
 }
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index c932c307ba..81090335f2 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -389,6 +389,9 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     case IR::Opcode::SubgroupBallot:
         info.uses_subgroup_vote = true;
         break;
+    case IR::Opcode::FSwizzleAdd:
+        info.uses_fswzadd = true;
+        break;
     default:
         break;
     }
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 9111159f36..4b4006b7f8 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -94,6 +94,7 @@ struct Info {
     bool uses_sparse_residency{};
     bool uses_demote_to_helper_invocation{};
     bool uses_subgroup_vote{};
+    bool uses_fswzadd{};
 
     IR::Type used_constant_buffer_types{};
 
-- 
cgit v1.2.3-70-g09d2


From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001
From: lat9nq <22451773+lat9nq@users.noreply.github.com>
Date: Mon, 5 Apr 2021 22:25:22 -0400
Subject: shader_recompiler,video_core: Cleanup some GCC and Clang errors

Mostly fixing unused *, implicit conversion, braced scalar init,
fpermissive, and some others.

Some Clang errors likely remain in video_core, and std::ranges is still
a pertinent issue in shader_recompiler

shader_recompiler: cmake: Force bracket depth to 1024 on Clang
Increases the maximum fold expression depth

thread_worker: Include condition_variable

Don't use list initializers in control flow

Co-authored-by: ReinUsesLisp <reinuseslisp@airmail.cc>
---
 src/common/thread_worker.h                         |   1 +
 src/shader_recompiler/CMakeLists.txt               |   2 +
 .../backend/spirv/emit_context.cpp                 |   4 +-
 src/shader_recompiler/backend/spirv/emit_spirv.cpp |  19 +--
 .../backend/spirv/emit_spirv_image.cpp             |  11 +-
 .../backend/spirv/emit_spirv_warp.cpp              |   2 +-
 src/shader_recompiler/file_environment.h           |   2 +-
 src/shader_recompiler/frontend/ir/attribute.cpp    |   4 +-
 src/shader_recompiler/frontend/ir/basic_block.cpp  |   2 +-
 src/shader_recompiler/frontend/ir/condition.cpp    |   6 +-
 src/shader_recompiler/frontend/ir/condition.h      |   4 +-
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   |   4 +-
 .../frontend/ir/microinstruction.cpp               |  16 +--
 .../frontend/ir/microinstruction.h                 |   4 +-
 src/shader_recompiler/frontend/ir/opcodes.cpp      |   2 +-
 src/shader_recompiler/frontend/ir/program.cpp      |   2 -
 src/shader_recompiler/frontend/ir/value.cpp        |   4 +-
 src/shader_recompiler/frontend/ir/value.h          |   2 +-
 .../frontend/maxwell/control_flow.cpp              | 140 +++++++++------------
 src/shader_recompiler/frontend/maxwell/decode.cpp  |  10 +-
 .../maxwell/indirect_branch_table_track.cpp        |  10 +-
 .../frontend/maxwell/structured_control_flow.cpp   |   3 +-
 .../frontend/maxwell/translate/impl/double_add.cpp |   6 +-
 .../translate/impl/double_fused_multiply_add.cpp   |   6 +-
 .../maxwell/translate/impl/double_multiply.cpp     |   6 +-
 .../maxwell/translate/impl/floating_point_add.cpp  |   6 +-
 .../translate/impl/floating_point_compare.cpp      |   3 +-
 .../impl/floating_point_compare_and_set.cpp        |   6 +-
 .../floating_point_conversion_floating_point.cpp   |   6 +-
 .../impl/floating_point_conversion_integer.cpp     |  11 +-
 .../impl/floating_point_fused_multiply_add.cpp     |   6 +-
 .../translate/impl/floating_point_min_max.cpp      |   6 +-
 .../translate/impl/floating_point_multiply.cpp     |   8 +-
 .../impl/floating_point_set_predicate.cpp          |   6 +-
 .../translate/impl/floating_point_swizzled_add.cpp |   6 +-
 .../translate/impl/half_floating_point_add.cpp     |  11 +-
 .../half_floating_point_fused_multiply_add.cpp     |  11 +-
 .../impl/half_floating_point_multiply.cpp          |  11 +-
 .../translate/impl/half_floating_point_set.cpp     |  11 +-
 .../impl/half_floating_point_set_predicate.cpp     |  12 +-
 .../frontend/maxwell/translate/impl/impl.cpp       |   8 +-
 .../maxwell/translate/impl/integer_add.cpp         |   1 -
 .../impl/integer_floating_point_conversion.cpp     |   4 +-
 .../maxwell/translate/impl/load_constant.cpp       |   2 +-
 .../translate/impl/load_store_local_shared.cpp     |   9 +-
 .../maxwell/translate/impl/load_store_memory.cpp   |   4 +-
 .../maxwell/translate/impl/texture_fetch.cpp       |   2 +-
 .../translate/impl/texture_fetch_swizzled.cpp      |   2 +-
 .../translate/impl/texture_gather_swizzled.cpp     |   2 +-
 .../translate/impl/texture_load_swizzled.cpp       |   2 +-
 .../maxwell/translate/impl/texture_query.cpp       |   2 +-
 .../maxwell/translate/impl/video_set_predicate.cpp |   1 -
 .../ir_opt/collect_shader_info_pass.cpp            |  20 +--
 .../ir_opt/constant_propagation_pass.cpp           |  49 ++++----
 .../global_memory_to_storage_buffer_pass.cpp       |  42 +++----
 .../ir_opt/identity_removal_pass.cpp               |   3 +-
 .../ir_opt/lower_fp16_to_fp32.cpp                  |   2 +-
 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp  |   4 +-
 src/shader_recompiler/ir_opt/texture_pass.cpp      |  32 ++---
 src/shader_recompiler/ir_opt/verification_pass.cpp |   4 +-
 src/tests/common/unique_function.cpp               |   2 +
 src/video_core/CMakeLists.txt                      |   2 +-
 .../renderer_vulkan/vk_graphics_pipeline.cpp       |  21 ++--
 .../renderer_vulkan/vk_pipeline_cache.cpp          |   5 +-
 .../renderer_vulkan/vk_render_pass_cache.cpp       |   2 -
 .../renderer_vulkan/vk_texture_cache.cpp           |   2 +-
 66 files changed, 308 insertions(+), 313 deletions(-)

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h
index 0a975a869d..cd0017726f 100644
--- a/src/common/thread_worker.h
+++ b/src/common/thread_worker.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <atomic>
+#include <condition_variable>
 #include <functional>
 #include <mutex>
 #include <stop_token>
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 22639fe132..551bf1c582 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -196,6 +196,8 @@ else()
         $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
         $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
         -Werror=unused-variable
+
+        $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024>
     )
 endif()
 
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index b738e00cc2..0c114402b4 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -4,6 +4,7 @@
 
 #include <algorithm>
 #include <array>
+#include <climits>
 #include <string_view>
 
 #include <fmt/format.h>
@@ -116,7 +117,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
         const std::string_view def_name_view(
             def_name.data(),
             fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size);
-        defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view);
+        defs[static_cast<size_t>(i)] =
+            sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view);
     }
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 32512a0e5f..355cf0ca8a 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -16,7 +16,7 @@
 namespace Shader::Backend::SPIRV {
 namespace {
 template <class Func>
-struct FuncTraits : FuncTraits<Func> {};
+struct FuncTraits {};
 
 template <class ReturnType_, class... Args>
 struct FuncTraits<ReturnType_ (*)(Args...)> {
@@ -64,17 +64,20 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
 template <auto func, bool is_first_arg_inst, size_t... I>
 void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
     using Traits = FuncTraits<decltype(func)>;
-    if constexpr (std::is_same_v<Traits::ReturnType, Id>) {
+    if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
         if constexpr (is_first_arg_inst) {
-            SetDefinition<func>(ctx, inst, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...);
+            SetDefinition<func>(
+                ctx, inst, inst,
+                Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
         } else {
-            SetDefinition<func>(ctx, inst, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...);
+            SetDefinition<func>(
+                ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
         }
     } else {
         if constexpr (is_first_arg_inst) {
-            func(ctx, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...);
+            func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
         } else {
-            func(ctx, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...);
+            func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
         }
     }
 }
@@ -94,14 +97,14 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) {
 }
 
 void EmitInst(EmitContext& ctx, IR::Inst* inst) {
-    switch (inst->Opcode()) {
+    switch (inst->GetOpcode()) {
 #define OPCODE(name, result_type, ...)                                                             \
     case IR::Opcode::name:                                                                         \
         return Invoke<&Emit##name>(ctx, inst);
 #include "shader_recompiler/frontend/ir/opcodes.inc"
 #undef OPCODE
     }
-    throw LogicError("Invalid opcode {}", inst->Opcode());
+    throw LogicError("Invalid opcode {}", inst->GetOpcode());
 }
 
 Id TypeId(const EmitContext& ctx, IR::Type type) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index f0f8db8c37..815ca62992 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -43,11 +43,13 @@ public:
             // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING");
             return;
         }
-        const IR::Opcode opcode{values[0]->Opcode()};
-        if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
+        const IR::Opcode opcode{values[0]->GetOpcode()};
+        if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
             throw LogicError("Invalid PTP arguments");
         }
-        auto read{[&](int a, int b) { return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); }};
+        auto read{[&](unsigned int a, unsigned int b) {
+            return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32());
+        }};
 
         const Id offsets{
             ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)),
@@ -297,13 +299,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
 
 Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
                        const IR::Value& offset, const IR::Value& offset2, Id dref) {
-    const auto info{inst->Flags<IR::TextureInstInfo>()};
     const ImageOperands operands(ctx, offset, offset2);
     return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
                 ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
 }
 
+#ifdef _WIN32
 #pragma optimize("", off)
+#endif
 
 Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
                   Id lod, Id ms) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index c57bd291db..12a03ed6ed 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -7,7 +7,7 @@
 namespace Shader::Backend::SPIRV {
 namespace {
 Id WarpExtract(EmitContext& ctx, Id value) {
-    const Id shift{ctx.Constant(ctx.U32[1], 5)};
+    [[maybe_unused]] const Id shift{ctx.Constant(ctx.U32[1], 5)};
     const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
     return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
 }
diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h
index 17640a6229..71601f8fd6 100644
--- a/src/shader_recompiler/file_environment.h
+++ b/src/shader_recompiler/file_environment.h
@@ -7,7 +7,7 @@
 
 namespace Shader {
 
-class FileEnvironment final : public Environment {
+class FileEnvironment : public Environment {
 public:
     explicit FileEnvironment(const char* path);
     ~FileEnvironment() override;
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
index 4811242ea0..7993e5c436 100644
--- a/src/shader_recompiler/frontend/ir/attribute.cpp
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -17,7 +17,7 @@ u32 GenericAttributeIndex(Attribute attribute) {
     if (!IsGeneric(attribute)) {
         throw InvalidArgument("Attribute is not generic {}", attribute);
     }
-    return (static_cast<int>(attribute) - static_cast<int>(Attribute::Generic0X)) / 4;
+    return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u;
 }
 
 std::string NameOf(Attribute attribute) {
@@ -444,4 +444,4 @@ std::string NameOf(Attribute attribute) {
     return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
 }
 
-} // namespace Shader::IR
\ No newline at end of file
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index ec029dfd6e..e1f0191f40 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -155,7 +155,7 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
     ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd());
 
     for (const Inst& inst : block) {
-        const Opcode op{inst.Opcode()};
+        const Opcode op{inst.GetOpcode()};
         ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
         if (TypeOf(op) != Type::Void) {
             ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp
index ec1659e2bc..fc18ea2a2f 100644
--- a/src/shader_recompiler/frontend/ir/condition.cpp
+++ b/src/shader_recompiler/frontend/ir/condition.cpp
@@ -12,10 +12,10 @@ namespace Shader::IR {
 
 std::string NameOf(Condition condition) {
     std::string ret;
-    if (condition.FlowTest() != FlowTest::T) {
-        ret = fmt::to_string(condition.FlowTest());
+    if (condition.GetFlowTest() != FlowTest::T) {
+        ret = fmt::to_string(condition.GetFlowTest());
     }
-    const auto [pred, negated]{condition.Pred()};
+    const auto [pred, negated]{condition.GetPred()};
     if (!ret.empty()) {
         ret += '&';
     }
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h
index 51c2f15cf5..aa8597c608 100644
--- a/src/shader_recompiler/frontend/ir/condition.h
+++ b/src/shader_recompiler/frontend/ir/condition.h
@@ -30,11 +30,11 @@ public:
 
     auto operator<=>(const Condition&) const noexcept = default;
 
-    [[nodiscard]] IR::FlowTest FlowTest() const noexcept {
+    [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept {
         return static_cast<IR::FlowTest>(flow_test);
     }
 
-    [[nodiscard]] std::pair<IR::Pred, bool> Pred() const noexcept {
+    [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept {
         return {static_cast<IR::Pred>(pred), pred_negated != 0};
     }
 
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 13eb2de4c4..a2104bdb31 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -290,8 +290,8 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) {
 }
 
 U1 IREmitter::Condition(IR::Condition cond) {
-    const FlowTest flow_test{cond.FlowTest()};
-    const auto [pred, is_negated]{cond.Pred()};
+    const FlowTest flow_test{cond.GetFlowTest()};
+    const auto [pred, is_negated]{cond.GetPred()};
     return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test));
 }
 
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 481202d94b..ceb44e6042 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -12,7 +12,7 @@
 namespace Shader::IR {
 namespace {
 void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
-    if (inst && inst->Opcode() != opcode) {
+    if (inst && inst->GetOpcode() != opcode) {
         throw LogicError("Invalid pseudo-instruction");
     }
 }
@@ -25,11 +25,17 @@ void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
 }
 
 void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
-    if (inst->Opcode() != expected_opcode) {
+    if (inst->GetOpcode() != expected_opcode) {
         throw LogicError("Undoing use of invalid pseudo-op");
     }
     inst = nullptr;
 }
+
+void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
+    if (!associated_insts) {
+        associated_insts = std::make_unique<AssociatedInsts>();
+    }
+}
 } // Anonymous namespace
 
 Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
@@ -249,12 +255,6 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) {
     op = opcode;
 }
 
-void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
-    if (!associated_insts) {
-        associated_insts = std::make_unique<AssociatedInsts>();
-    }
-}
-
 void Inst::Use(const Value& value) {
     Inst* const inst{value.Inst()};
     ++inst->use_count;
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 6658dc674e..97dc91d855 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -46,7 +46,7 @@ public:
     }
 
     /// Get the opcode this microinstruction represents.
-    [[nodiscard]] IR::Opcode Opcode() const noexcept {
+    [[nodiscard]] IR::Opcode GetOpcode() const noexcept {
         return op;
     }
 
@@ -95,7 +95,7 @@ public:
     requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
         [[nodiscard]] FlagsType Flags() const noexcept {
         FlagsType ret;
-        std::memcpy(&ret, &flags, sizeof(ret));
+        std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
         return ret;
     }
 
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
index 1cb9db6c9c..002dbf94e9 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.cpp
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -49,7 +49,7 @@ constexpr std::array META_TABLE{
 #define OPCODE(name_token, type_token, ...)                                                        \
     OpcodeMeta{                                                                                    \
         .name{#name_token},                                                                        \
-        .type{type_token},                                                                         \
+        .type = type_token,                                                                         \
         .arg_types{__VA_ARGS__},                                                                   \
     },
 #include "opcodes.inc"
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp
index 5f51aeb5f3..89a17fb1b4 100644
--- a/src/shader_recompiler/frontend/ir/program.cpp
+++ b/src/shader_recompiler/frontend/ir/program.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#pragma once
-
 #include <map>
 #include <string>
 
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index 837c1b487f..1e7ffb86d5 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -33,11 +33,11 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
 Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
 
 bool Value::IsIdentity() const noexcept {
-    return type == Type::Opaque && inst->Opcode() == Opcode::Identity;
+    return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity;
 }
 
 bool Value::IsPhi() const noexcept {
-    return type == Type::Opaque && inst->Opcode() == Opcode::Phi;
+    return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi;
 }
 
 bool Value::IsEmpty() const noexcept {
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index b27601e704..a0962863d8 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -94,7 +94,7 @@ public:
         }
     }
 
-    explicit TypedValue(IR::Inst* inst) : TypedValue(Value(inst)) {}
+    explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
 };
 
 using U1 = TypedValue<Type::U1>;
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
index 847bb19864..cb8ec7eaa3 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -34,41 +34,37 @@ struct Compare {
 };
 
 u32 BranchOffset(Location pc, Instruction inst) {
-    return pc.Offset() + inst.branch.Offset() + 8;
+    return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
 }
 
 void Split(Block* old_block, Block* new_block, Location pc) {
     if (pc <= old_block->begin || pc >= old_block->end) {
         throw InvalidArgument("Invalid address to split={}", pc);
     }
-    *new_block = Block{
-        .begin{pc},
-        .end{old_block->end},
-        .end_class{old_block->end_class},
-        .cond{old_block->cond},
-        .stack{old_block->stack},
-        .branch_true{old_block->branch_true},
-        .branch_false{old_block->branch_false},
-        .function_call{old_block->function_call},
-        .return_block{old_block->return_block},
-        .branch_reg{old_block->branch_reg},
-        .branch_offset{old_block->branch_offset},
-        .indirect_branches{std::move(old_block->indirect_branches)},
-    };
-    *old_block = Block{
-        .begin{old_block->begin},
-        .end{pc},
-        .end_class{EndClass::Branch},
-        .cond{true},
-        .stack{std::move(old_block->stack)},
-        .branch_true{new_block},
-        .branch_false{nullptr},
-        .function_call{},
-        .return_block{},
-        .branch_reg{},
-        .branch_offset{},
-        .indirect_branches{},
-    };
+    *new_block = Block{};
+    new_block->begin = pc;
+    new_block->end = old_block->end;
+    new_block->end_class = old_block->end_class,
+    new_block->cond = old_block->cond;
+    new_block->stack = old_block->stack;
+    new_block->branch_true = old_block->branch_true;
+    new_block->branch_false = old_block->branch_false;
+    new_block->function_call = old_block->function_call;
+    new_block->return_block = old_block->return_block;
+    new_block->branch_reg = old_block->branch_reg;
+    new_block->branch_offset = old_block->branch_offset;
+    new_block->indirect_branches = std::move(old_block->indirect_branches);
+
+    const Location old_begin{old_block->begin};
+    Stack old_stack{std::move(old_block->stack)};
+    *old_block = Block{};
+    old_block->begin = old_begin;
+    old_block->end = pc;
+    old_block->end_class = EndClass::Branch;
+    old_block->cond = IR::Condition(true);
+    old_block->stack = old_stack;
+    old_block->branch_true = new_block;
+    old_block->branch_false = nullptr;
 }
 
 Token OpcodeToken(Opcode opcode) {
@@ -141,7 +137,7 @@ std::string NameOf(const Block& block) {
 
 void Stack::Push(Token token, Location target) {
     entries.push_back({
-        .token{token},
+        .token = token,
         .target{target},
     });
 }
@@ -177,24 +173,17 @@ bool Block::Contains(Location pc) const noexcept {
 }
 
 Function::Function(ObjectPool<Block>& block_pool, Location start_address)
-    : entrypoint{start_address}, labels{{
-                                     .address{start_address},
-                                     .block{block_pool.Create(Block{
-                                         .begin{start_address},
-                                         .end{start_address},
-                                         .end_class{EndClass::Branch},
-                                         .cond{true},
-                                         .stack{},
-                                         .branch_true{nullptr},
-                                         .branch_false{nullptr},
-                                         .function_call{},
-                                         .return_block{},
-                                         .branch_reg{},
-                                         .branch_offset{},
-                                         .indirect_branches{},
-                                     })},
-                                     .stack{},
-                                 }} {}
+    : entrypoint{start_address} {
+    Label& label{labels.emplace_back()};
+    label.address = start_address;
+    label.block = block_pool.Create(Block{});
+    label.block->begin = start_address;
+    label.block->end = start_address;
+    label.block->end_class = EndClass::Branch;
+    label.block->cond = IR::Condition(true);
+    label.block->branch_true = nullptr;
+    label.block->branch_false = nullptr;
+}
 
 CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address)
     : env{env_}, block_pool{block_pool_}, program_start{start_address} {
@@ -327,7 +316,8 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
         // Insert the function into the list if it doesn't exist
         const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
         const bool exists{it != functions.end()};
-        const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()};
+        const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
+                                        : functions.size()};
         if (!exists) {
             functions.emplace_back(block_pool, cal_pc);
         }
@@ -362,20 +352,14 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
     }
     // Create a virtual block and a conditional block
     Block* const conditional_block{block_pool.Create()};
-    Block virtual_block{
-        .begin{block->begin.Virtual()},
-        .end{block->begin.Virtual()},
-        .end_class{EndClass::Branch},
-        .cond{cond},
-        .stack{block->stack},
-        .branch_true{conditional_block},
-        .branch_false{nullptr},
-        .function_call{},
-        .return_block{},
-        .branch_reg{},
-        .branch_offset{},
-        .indirect_branches{},
-    };
+    Block virtual_block{};
+    virtual_block.begin = block->begin.Virtual();
+    virtual_block.end = block->begin.Virtual();
+    virtual_block.end_class = EndClass::Branch;
+    virtual_block.stack = block->stack;
+    virtual_block.cond = cond;
+    virtual_block.branch_true = conditional_block;
+    virtual_block.branch_false = nullptr;
     // Save the contents of the visited block in the conditional block
     *conditional_block = std::move(*block);
     // Impersonate the visited block with a virtual block
@@ -444,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst,
         if (!is_absolute) {
             target += pc.Offset();
         }
-        target += brx_table->branch_offset;
+        target += static_cast<unsigned int>(brx_table->branch_offset);
         target += 8;
         targets.push_back(target);
     }
@@ -455,8 +439,8 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst,
     for (const u32 target : targets) {
         Block* const branch{AddLabel(block, block->stack, target, function_id)};
         block->indirect_branches.push_back({
-            .block{branch},
-            .address{target},
+            .block = branch,
+            .address = target,
         });
     }
     block->cond = IR::Condition{true};
@@ -523,23 +507,17 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function
     if (label_it != function.labels.end()) {
         return label_it->block;
     }
-    Block* const new_block{block_pool.Create(Block{
-        .begin{pc},
-        .end{pc},
-        .end_class{EndClass::Branch},
-        .cond{true},
-        .stack{stack},
-        .branch_true{nullptr},
-        .branch_false{nullptr},
-        .function_call{},
-        .return_block{},
-        .branch_reg{},
-        .branch_offset{},
-        .indirect_branches{},
-    })};
+    Block* const new_block{block_pool.Create()};
+    new_block->begin = pc;
+    new_block->end = pc;
+    new_block->end_class = EndClass::Branch;
+    new_block->cond = IR::Condition(true);
+    new_block->stack = stack;
+    new_block->branch_true = nullptr;
+    new_block->branch_false = nullptr;
     function.labels.push_back(Label{
         .address{pc},
-        .block{new_block},
+        .block = new_block,
         .stack{std::move(stack)},
     });
     return new_block;
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp
index bd85afa1e5..932d19c1d4 100644
--- a/src/shader_recompiler/frontend/maxwell/decode.cpp
+++ b/src/shader_recompiler/frontend/maxwell/decode.cpp
@@ -45,7 +45,7 @@ constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
             bit >>= 1;
         }
     }
-    return MaskValue{.mask{mask}, .value{value}};
+    return MaskValue{.mask = mask, .value = value};
 }
 
 struct InstEncoding {
@@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{
 #define INST(name, cute, encode)                                                                   \
     InstEncoding{                                                                                  \
         .mask_value{MaskValueFromEncoding(encode)},                                                \
-        .opcode{Opcode::name},                                                                     \
+        .opcode = Opcode::name,                                                                     \
     },
 #include "maxwell.inc"
 #undef INST
@@ -116,9 +116,9 @@ constexpr auto MakeFastLookupTableIndex(size_t index) {
         const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
         if ((index & mask) == value) {
             encodings.at(element) = InstInfo{
-                .high_mask{static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT)},
-                .high_value{static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT)},
-                .opcode{encoding.opcode},
+                .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
+                .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
+                .opcode = encoding.opcode,
             };
             ++element;
         }
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
index 96453509d5..008625cb37 100644
--- a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
@@ -97,11 +97,11 @@ std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env
     }
     const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
     return IndirectBranchTableInfo{
-        .cbuf_index{cbuf_index},
-        .cbuf_offset{cbuf_offset},
-        .num_entries{imnmx_immediate + 1},
-        .branch_offset{brx_offset},
-        .branch_reg{brx_reg},
+        .cbuf_index = cbuf_index,
+        .cbuf_offset = cbuf_offset,
+        .num_entries = imnmx_immediate + 1,
+        .branch_offset = brx_offset,
+        .branch_reg = brx_reg,
     };
 }
 
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index c804c2a8e9..02cef26455 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -558,7 +558,6 @@ private:
         const Node label{goto_stmt->label};
         const u32 label_id{label->id};
         const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)};
-        const auto type{label_nested_stmt->type};
 
         Tree loop_body;
         loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
@@ -566,7 +565,7 @@ private:
         Statement* const variable{pool.Create(Variable{}, label_id)};
         Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
         UpdateTreeUp(loop_stmt);
-        const Node loop_node{body.insert(goto_stmt, *loop_stmt)};
+        body.insert(goto_stmt, *loop_stmt);
 
         Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
         loop_stmt->children.push_front(*new_goto);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
index ac1433dea7..5a1b3a8fcb 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -31,9 +31,9 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
     const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
 
     const IR::FpControl control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(dadd.fp_rounding)},
-        .fmz_mode{IR::FmzMode::None},
+        .no_contraction = true,
+        .rounding = CastFpRounding(dadd.fp_rounding),
+        .fmz_mode = IR::FmzMode::None,
     };
 
     v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
index ff73218629..7238414962 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -25,9 +25,9 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s
     const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
 
     const IR::FpControl control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(dfma.fp_rounding)},
-        .fmz_mode{IR::FmzMode::None},
+        .no_contraction = true,
+        .rounding = CastFpRounding(dfma.fp_rounding),
+        .fmz_mode = IR::FmzMode::None,
     };
 
     v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
index 3e83d1c95c..4a49299a0b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -21,9 +21,9 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
 
     const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
     const IR::FpControl control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(dmul.fp_rounding)},
-        .fmz_mode{IR::FmzMode::None},
+        .no_contraction = true,
+        .rounding = CastFpRounding(dmul.fp_rounding),
+        .fmz_mode = IR::FmzMode::None,
     };
 
     v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
index b39950c849..b8c89810cb 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -23,9 +23,9 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin
     const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
     const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
     IR::FpControl control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(fp_rounding)},
-        .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = true,
+        .rounding = CastFpRounding(fp_rounding),
+        .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
     IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
     if (sat) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
index c02a40209e..80109ca0e5 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -19,8 +19,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o
     } const fcmp{insn};
 
     const IR::F32 zero{v.ir.Imm32(0.0f)};
-    const IR::F32 neg_zero{v.ir.Imm32(-0.0f)};
-    const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}};
+    const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
     const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
     const IR::U32 src_reg{v.X(fcmp.src_reg)};
     const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
index c5417775e1..b9f4ee0d9b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -29,9 +29,9 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
     const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
     const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
     const IR::FpControl control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
 
     IR::U1 pred{v.ir.GetPred(fset.pred)};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
index 1e366fde03..035f8782a7 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -57,9 +57,9 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
 
     const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
     IR::FpControl fp_control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
     if (f2f.src_size != f2f.dst_size) {
         fp_control.rounding = CastFpRounding(f2f.rounding);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
index 21ae92be1e..cf3cf1ba69 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -123,9 +123,9 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
         fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
     }
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{fmz_mode},
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = fmz_mode,
     };
     const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
     const IR::F16F32F64 rounded_value{[&] {
@@ -186,14 +186,14 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
         } else if (f2i.dest_format == DestFormat::I64) {
             handled_special_case = true;
             result = IR::U64{
-                v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)};
+                v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
         }
     }
     if (!handled_special_case && is_signed) {
         if (bitsize != 64) {
             result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
         } else {
-            result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)};
+            result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)};
         }
     }
 
@@ -211,6 +211,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
 
 void TranslatorVisitor::F2I_reg(u64 insn) {
     union {
+        u64 raw;
         F2I base;
         BitField<20, 8, IR::Reg> src_reg;
     } const f2i{insn};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
index 18561bc9c7..fa2a7807b7 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -24,9 +24,9 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s
     const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
     const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(fp_rounding)},
-        .fmz_mode{CastFmzMode(fmz_mode)},
+        .no_contraction = true,
+        .rounding = CastFpRounding(fp_rounding),
+        .fmz_mode = CastFmzMode(fmz_mode),
     };
     IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
     if (fmz_mode == FmzMode::FMZ && !sat) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
index 343d91032b..8ae4375287 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -27,9 +27,9 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
     const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
 
     const IR::FpControl control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
     IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
     IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
index 72f0a18ae8..06226b7ce2 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -64,9 +64,9 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode
     }
     const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{CastFpRounding(fp_rounding)},
-        .fmz_mode{CastFmzMode(fmz_mode)},
+        .no_contraction = true,
+        .rounding = CastFpRounding(fp_rounding),
+        .fmz_mode = CastFmzMode(fmz_mode),
     };
     IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
     if (fmz_mode == FmzMode::FMZ && !sat) {
@@ -124,4 +124,4 @@ void TranslatorVisitor::FMUL32I(u64 insn) {
          fmul32i.sat != 0, fmul32i.cc != 0, false);
 }
 
-} // namespace Shader::Maxwell
\ No newline at end of file
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
index 8ff9db8438..5f93a15130 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -29,9 +29,9 @@ void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
     const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
     const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
     const IR::FpControl control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
 
     const BooleanOp bop{fsetp.bop};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
index e42921a216..7550a8d4c4 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -28,9 +28,9 @@ void TranslatorVisitor::FSWZADD(u64 insn) {
     const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
 
     const IR::FpControl fp_control{
-        .no_contraction{false},
-        .rounding{CastFpRounding(fswzadd.round)},
-        .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = CastFpRounding(fswzadd.round),
+        .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
 
     const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
index 03e7bf047d..f2738a93b2 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -34,9 +34,9 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool
     rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
 
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
     IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
     IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
@@ -102,8 +102,9 @@ void TranslatorVisitor::HADD2_imm(u64 insn) {
         BitField<20, 9, u64> low;
     } const hadd2{insn};
 
-    const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) |
-                  static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)};
+    const u32 imm{
+        static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
     HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
 }
 
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
index 8b234bd6ae..fd79867016 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -41,9 +41,9 @@ void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool
     rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
 
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{HalfPrecision2FmzMode(precision)},
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = HalfPrecision2FmzMode(precision),
     };
     IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
     IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
@@ -143,8 +143,9 @@ void TranslatorVisitor::HFMA2_imm(u64 insn) {
         BitField<57, 2, HalfPrecision> precision;
     } const hfma2{insn};
 
-    const u32 imm{static_cast<u32>(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) |
-                  static_cast<u32>(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)};
+    const u32 imm{
+        static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
 
     HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
           GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
index 2451a6ef68..3f548ce761 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -35,9 +35,9 @@ void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bo
     rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
 
     const IR::FpControl fp_control{
-        .no_contraction{true},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{HalfPrecision2FmzMode(precision)},
+        .no_contraction = true,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = HalfPrecision2FmzMode(precision),
     };
     IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
     IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
@@ -119,8 +119,9 @@ void TranslatorVisitor::HMUL2_imm(u64 insn) {
         BitField<44, 1, u64> abs_a;
     } const hmul2{insn};
 
-    const u32 imm{static_cast<u32>(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) |
-                  static_cast<u32>(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)};
+    const u32 imm{
+        static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
     HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
           Swizzle::H1_H0, ir.Imm32(imm));
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
index 7f1f4b88c8..cca5b831fd 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -41,9 +41,9 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f
     rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
 
     const IR::FpControl control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
 
     IR::U1 pred{v.ir.GetPred(hset2.pred)};
@@ -106,8 +106,9 @@ void TranslatorVisitor::HSET2_imm(u64 insn) {
         BitField<20, 9, u64> low;
     } const hset2{insn};
 
-    const u32 imm{static_cast<u32>(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) |
-                  static_cast<u32>(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)};
+    const u32 imm{
+        static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
+        static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
 
     HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
           Swizzle::H1_H0);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
index 3e2a23c92d..b3931dae32 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -43,9 +43,9 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo
     rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
 
     const IR::FpControl control{
-        .no_contraction{false},
-        .rounding{IR::FpRounding::DontCare},
-        .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
+        .no_contraction = false,
+        .rounding = IR::FpRounding::DontCare,
+        .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
     };
 
     IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
@@ -106,8 +106,10 @@ void TranslatorVisitor::HSETP2_imm(u64 insn) {
         BitField<20, 9, u64> low;
     } const hsetp2{insn};
 
-    const u32 imm{static_cast<u32>(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
-                  static_cast<u32>(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
+    const u32 imm{static_cast<u32>(hsetp2.low << 6) |
+                  static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
+                  static_cast<u32>(hsetp2.high << 22) |
+                  static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
 
     HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
            hsetp2.h_and != 0);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 30b570ce4d..88bbac0a50 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -49,7 +49,7 @@ void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
     }
     const IR::Value result{ir.UnpackUint2x32(value)};
     for (int i = 0; i < 2; i++) {
-        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
+        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
     }
 }
 
@@ -63,7 +63,7 @@ void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
     }
     const IR::Value result{ir.UnpackDouble2x32(value)};
     for (int i = 0; i < 2; i++) {
-        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
+        X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
     }
 }
 
@@ -156,7 +156,7 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
     const auto [binding, offset_value]{CbufAddr(insn)};
     const bool unaligned{cbuf.unaligned != 0};
     const u32 offset{offset_value.U32()};
-    const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4};
+    const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
 
     const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
     const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
@@ -200,7 +200,7 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
         BitField<20, 19, u64> value;
         BitField<56, 1, u64> is_negative;
     } const imm{insn};
-    const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0};
+    const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
     const u32 value{static_cast<u32>(imm.value) << 12};
     return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
index 1493e18151..8ffd84867d 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -68,7 +68,6 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
     } const iadd{insn};
 
     const bool po{iadd.three_for_po == 3};
-    const bool neg_a{!po && iadd.neg_a != 0};
     if (!po && iadd.neg_b != 0) {
         op_b = v.ir.INeg(op_b);
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
index e8b5ae1d2d..5a0fc36a03 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -131,7 +131,7 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
         }
         const IR::Value vector{v.ir.UnpackDouble2x32(value)};
         for (int i = 0; i < 2; ++i) {
-            v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)});
+            v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
         }
         break;
     }
@@ -170,4 +170,4 @@ void TranslatorVisitor::I2F_imm(u64 insn) {
     }
 }
 
-} // namespace Shader::Maxwell
\ No newline at end of file
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
index ae3ecea325..2300088e38 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -50,7 +50,7 @@ void TranslatorVisitor::LDC(u64 insn) {
         }
         const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
         for (int i = 0; i < 2; ++i) {
-            X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
+            X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
         }
         break;
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
index 68963c8ea6..e24b497210 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -40,7 +40,6 @@ std::pair<int, bool> GetSize(u64 insn) {
         BitField<48, 3, Size> size;
     } const encoding{insn};
 
-    const Size nnn = encoding.size;
     switch (encoding.size) {
     case Size::U8:
         return {8, false};
@@ -99,7 +98,7 @@ void TranslatorVisitor::LDL(u64 insn) {
     case 32:
     case 64:
     case 128:
-        if (!IR::IsAligned(dest, bit_size / 32)) {
+        if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
             throw NotImplementedException("Unaligned destination register {}", dest);
         }
         X(dest, ir.LoadLocal(word_offset));
@@ -123,11 +122,11 @@ void TranslatorVisitor::LDS(u64 insn) {
         break;
     case 64:
     case 128:
-        if (!IR::IsAligned(dest, bit_size / 32)) {
+        if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
             throw NotImplementedException("Unaligned destination register {}", dest);
         }
         for (int element = 0; element < bit_size / 32; ++element) {
-            X(dest + element, IR::U32{ir.CompositeExtract(value, element)});
+            X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
         }
         break;
     }
@@ -156,7 +155,7 @@ void TranslatorVisitor::STL(u64 insn) {
     case 32:
     case 64:
     case 128:
-        if (!IR::IsAligned(reg, bit_size / 32)) {
+        if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
             throw NotImplementedException("Unaligned source register");
         }
         ir.WriteLocal(word_offset, src);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
index 71688b1d78..36c5cff2f1 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) {
         }
         const IR::Value vector{ir.LoadGlobal64(address)};
         for (int i = 0; i < 2; ++i) {
-            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
+            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
         }
         break;
     }
@@ -125,7 +125,7 @@ void TranslatorVisitor::LDG(u64 insn) {
         }
         const IR::Value vector{ir.LoadGlobal128(address)};
         for (int i = 0; i < 4; ++i) {
-            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
+            X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
         }
         break;
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
index b2da079f9c..95d4165863 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -199,7 +199,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
         if (tex.dc != 0) {
             value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
         } else {
-            value = IR::F32{v.ir.CompositeExtract(sample, element)};
+            value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
         }
         v.F(dest_reg, value);
         ++dest_reg;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
index d5fda20f42..fe2c7db85d 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -53,7 +53,7 @@ constexpr std::array RGBA_LUT{
     R | G | B | A, //
 };
 
-void CheckAlignment(IR::Reg reg, int alignment) {
+void CheckAlignment(IR::Reg reg, size_t alignment) {
     if (!IR::IsAligned(reg, alignment)) {
         throw NotImplementedException("Unaligned source register {}", reg);
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
index beab515ad9..2ba9c1018a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -37,7 +37,7 @@ union Encoding {
     BitField<36, 13, u64> cbuf_offset;
 };
 
-void CheckAlignment(IR::Reg reg, int alignment) {
+void CheckAlignment(IR::Reg reg, size_t alignment) {
     if (!IR::IsAligned(reg, alignment)) {
         throw NotImplementedException("Unaligned source register {}", reg);
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
index 623b8fc23b..0863bdfcd4 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -56,7 +56,7 @@ union Encoding {
     BitField<53, 4, u64> encoding;
 };
 
-void CheckAlignment(IR::Reg reg, int alignment) {
+void CheckAlignment(IR::Reg reg, size_t alignment) {
     if (!IR::IsAligned(reg, alignment)) {
         throw NotImplementedException("Unaligned source register {}", reg);
     }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
index 8c7e04bcab..0459e5473e 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -54,7 +54,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
         if (((txq.mask >> element) & 1) == 0) {
             continue;
         }
-        v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)});
+        v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
         ++dest_reg;
     }
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
index af13b3fccf..ec5e74f6d8 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -69,7 +69,6 @@ void TranslatorVisitor::VSETP(u64 insn) {
     const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
 
     const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
-    const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vsetp.src_b_selector)};
     const VideoWidth a_width{vsetp.src_a_width};
     const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
 
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 1c03ee82af..edbfcd3082 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -6,6 +6,7 @@
 #include "shader_recompiler/frontend/ir/microinstruction.h"
 #include "shader_recompiler/frontend/ir/modifiers.h"
 #include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/ir_opt/passes.h"
 #include "shader_recompiler/shader_info.h"
 
 namespace Shader::Optimization {
@@ -22,8 +23,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
     auto& cbufs{info.constant_buffer_descriptors};
     cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
                  ConstantBufferDescriptor{
-                     .index{index},
-                     .count{1},
+                     .index = index,
+                     .count = 1,
                  });
 }
 
@@ -91,7 +92,7 @@ void SetAttribute(Info& info, IR::Attribute attribute) {
 }
 
 void VisitUsages(Info& info, IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::CompositeConstructF16x2:
     case IR::Opcode::CompositeConstructF16x3:
     case IR::Opcode::CompositeConstructF16x4:
@@ -209,7 +210,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     default:
         break;
     }
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::GetCbufU8:
     case IR::Opcode::GetCbufS8:
     case IR::Opcode::UndefU8:
@@ -236,7 +237,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     default:
         break;
     }
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::GetCbufU16:
     case IR::Opcode::GetCbufS16:
     case IR::Opcode::UndefU16:
@@ -271,7 +272,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     default:
         break;
     }
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::UndefU64:
     case IR::Opcode::LoadGlobalU8:
     case IR::Opcode::LoadGlobalS8:
@@ -314,7 +315,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
     default:
         break;
     }
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::DemoteToHelperInvocation:
         info.uses_demote_to_helper_invocation = true;
         break;
@@ -361,7 +362,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
         } else {
             throw NotImplementedException("Constant buffer with non-immediate index");
         }
-        switch (inst.Opcode()) {
+        switch (inst.GetOpcode()) {
         case IR::Opcode::GetCbufU8:
         case IR::Opcode::GetCbufS8:
             info.used_constant_buffer_types |= IR::Type::U8;
@@ -443,7 +444,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
 }
 
 void VisitFpModifiers(Info& info, IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::FPAdd16:
     case IR::Opcode::FPFma16:
     case IR::Opcode::FPMul16:
@@ -540,7 +541,6 @@ void GatherInfoFromHeader(Environment& env, Info& info) {
         info.stores_position |= header.vtg.omap_systemb.position != 0;
     }
 }
-
 } // Anonymous namespace
 
 void CollectShaderInfoPass(Environment& env, IR::Program& program) {
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 1720d7a092..61fbbe04cb 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -58,7 +58,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
     }
     if (is_lhs_immediate && !is_rhs_immediate) {
         IR::Inst* const rhs_inst{rhs.InstRecursive()};
-        if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) {
+        if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
             const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
             inst.SetArg(0, rhs_inst->Arg(0));
             inst.SetArg(1, IR::Value{combined});
@@ -70,7 +70,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
     }
     if (!is_lhs_immediate && is_rhs_immediate) {
         const IR::Inst* const lhs_inst{lhs.InstRecursive()};
-        if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) {
+        if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
             const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
             inst.SetArg(0, lhs_inst->Arg(0));
             inst.SetArg(1, IR::Value{combined});
@@ -123,7 +123,8 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
         return false;
     }
     IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
-    if (lhs_shl->Opcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) {
+    if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
+        lhs_shl->Arg(1) != IR::Value{16U}) {
         return false;
     }
     if (lhs_shl->Arg(0).IsImmediate()) {
@@ -131,7 +132,7 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
     }
     IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
     IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
-    if (lhs_mul->Opcode() != IR::Opcode::IMul32 || rhs_mul->Opcode() != IR::Opcode::IMul32) {
+    if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
         return false;
     }
     if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
@@ -143,10 +144,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
     }
     IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
     IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
-    if (lhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) {
+    if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
         return false;
     }
-    if (rhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) {
+    if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
         return false;
     }
     if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
@@ -194,8 +195,9 @@ void FoldISub32(IR::Inst& inst) {
     // ISub32 is generally used to subtract two constant buffers, compare and replace this with
     // zero if they equal.
     const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
-        return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 &&
-               a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1);
+        return a->GetOpcode() == IR::Opcode::GetCbufU32 &&
+               b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) &&
+               a->Arg(1) == b->Arg(1);
     }};
     IR::Inst* op_a{inst.Arg(0).InstRecursive()};
     IR::Inst* op_b{inst.Arg(1).InstRecursive()};
@@ -204,15 +206,15 @@ void FoldISub32(IR::Inst& inst) {
         return;
     }
     // It's also possible a value is being added to a cbuf and then subtracted
-    if (op_b->Opcode() == IR::Opcode::IAdd32) {
+    if (op_b->GetOpcode() == IR::Opcode::IAdd32) {
         // Canonicalize local variables to simplify the following logic
         std::swap(op_a, op_b);
     }
-    if (op_b->Opcode() != IR::Opcode::GetCbufU32) {
+    if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) {
         return;
     }
     IR::Inst* const inst_cbuf{op_b};
-    if (op_a->Opcode() != IR::Opcode::IAdd32) {
+    if (op_a->GetOpcode() != IR::Opcode::IAdd32) {
         return;
     }
     IR::Value add_op_a{op_a->Arg(0)};
@@ -250,7 +252,8 @@ void FoldFPMul32(IR::Inst& inst) {
     }
     IR::Inst* const lhs_op{lhs_value.InstRecursive()};
     IR::Inst* const rhs_op{rhs_value.InstRecursive()};
-    if (lhs_op->Opcode() != IR::Opcode::FPMul32 || rhs_op->Opcode() != IR::Opcode::FPRecip32) {
+    if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
+        rhs_op->GetOpcode() != IR::Opcode::FPRecip32) {
         return;
     }
     const IR::Value recip_source{rhs_op->Arg(0)};
@@ -260,8 +263,8 @@ void FoldFPMul32(IR::Inst& inst) {
     }
     IR::Inst* const attr_a{recip_source.InstRecursive()};
     IR::Inst* const attr_b{lhs_mul_source.InstRecursive()};
-    if (attr_a->Opcode() != IR::Opcode::GetAttribute ||
-        attr_b->Opcode() != IR::Opcode::GetAttribute) {
+    if (attr_a->GetOpcode() != IR::Opcode::GetAttribute ||
+        attr_b->GetOpcode() != IR::Opcode::GetAttribute) {
         return;
     }
     if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) {
@@ -304,7 +307,7 @@ void FoldLogicalNot(IR::Inst& inst) {
         return;
     }
     IR::Inst* const arg{value.InstRecursive()};
-    if (arg->Opcode() == IR::Opcode::LogicalNot) {
+    if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
         inst.ReplaceUsesWith(arg->Arg(0));
     }
 }
@@ -317,12 +320,12 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
         return;
     }
     IR::Inst* const arg_inst{value.InstRecursive()};
-    if (arg_inst->Opcode() == reverse) {
+    if (arg_inst->GetOpcode() == reverse) {
         inst.ReplaceUsesWith(arg_inst->Arg(0));
         return;
     }
     if constexpr (op == IR::Opcode::BitCastF32U32) {
-        if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) {
+        if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) {
             // Replace the bitcast with a typed constant buffer read
             inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
             inst.SetArg(0, arg_inst->Arg(0));
@@ -338,7 +341,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
         return;
     }
     IR::Inst* const arg_inst{value.InstRecursive()};
-    if (arg_inst->Opcode() == reverse) {
+    if (arg_inst->GetOpcode() == reverse) {
         inst.ReplaceUsesWith(arg_inst->Arg(0));
         return;
     }
@@ -347,7 +350,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
 template <typename Func, size_t... I>
 IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
     using Traits = LambdaTraits<decltype(func)>;
-    return IR::Value{func(Arg<Traits::ArgType<I>>(inst.Arg(I))...)};
+    return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
 }
 
 void FoldBranchConditional(IR::Inst& inst) {
@@ -357,7 +360,7 @@ void FoldBranchConditional(IR::Inst& inst) {
         return;
     }
     const IR::Inst* cond_inst{cond.InstRecursive()};
-    if (cond_inst->Opcode() == IR::Opcode::LogicalNot) {
+    if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) {
         const IR::Value true_label{inst.Arg(1)};
         const IR::Value false_label{inst.Arg(2)};
         // Remove negation on the conditional (take the parameter out of LogicalNot) and swap
@@ -371,10 +374,10 @@ void FoldBranchConditional(IR::Inst& inst) {
 std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
                                                   IR::Opcode construct, u32 first_index) {
     IR::Inst* const inst{inst_value.InstRecursive()};
-    if (inst->Opcode() == construct) {
+    if (inst->GetOpcode() == construct) {
         return inst->Arg(first_index);
     }
-    if (inst->Opcode() != insert) {
+    if (inst->GetOpcode() != insert) {
         return std::nullopt;
     }
     IR::Value value_index{inst->Arg(2)};
@@ -410,7 +413,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser
 }
 
 void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::GetRegister:
         return FoldGetRegister(inst);
     case IR::Opcode::GetPred:
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 0858a0bddd..90a65dd167 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -57,7 +57,7 @@ struct StorageInfo {
 
 /// Returns true when the instruction is a global memory instruction
 bool IsGlobalMemory(const IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::LoadGlobalS8:
     case IR::Opcode::LoadGlobalU8:
     case IR::Opcode::LoadGlobalS16:
@@ -80,7 +80,7 @@ bool IsGlobalMemory(const IR::Inst& inst) {
 
 /// Returns true when the instruction is a global memory instruction
 bool IsGlobalMemoryWrite(const IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::WriteGlobalS8:
     case IR::Opcode::WriteGlobalU8:
     case IR::Opcode::WriteGlobalS16:
@@ -140,7 +140,7 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce
 void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) {
     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
     const IR::Value zero{u32{0}};
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::LoadGlobalS8:
     case IR::Opcode::LoadGlobalU8:
     case IR::Opcode::LoadGlobalS16:
@@ -164,7 +164,7 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) {
         inst.Invalidate();
         break;
     default:
-        throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode());
+        throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode());
     }
 }
 
@@ -184,7 +184,7 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
     // This address is expected to either be a PackUint2x32 or a IAdd64
     IR::Inst* addr_inst{addr.InstRecursive()};
     s32 imm_offset{0};
-    if (addr_inst->Opcode() == IR::Opcode::IAdd64) {
+    if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
         // If it's an IAdd64, get the immediate offset it is applying and grab the address
         // instruction. This expects for the instruction to be canonicalized having the address on
         // the first argument and the immediate offset on the second one.
@@ -200,7 +200,7 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
         addr_inst = iadd_addr.Inst();
     }
     // With IAdd64 handled, now PackUint2x32 is expected without exceptions
-    if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) {
+    if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) {
         return std::nullopt;
     }
     // PackUint2x32 is expected to be generated from a vector
@@ -210,20 +210,20 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
     }
     // This vector is expected to be a CompositeConstructU32x2
     IR::Inst* const vector_inst{vector.InstRecursive()};
-    if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) {
+    if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
         return std::nullopt;
     }
     // Grab the first argument from the CompositeConstructU32x2, this is the low address.
     return LowAddrInfo{
         .value{IR::U32{vector_inst->Arg(0)}},
-        .imm_offset{imm_offset},
+        .imm_offset = imm_offset,
     };
 }
 
 /// Tries to track the storage buffer address used by a global memory instruction
 std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
     const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
-        if (inst->Opcode() != IR::Opcode::GetCbufU32) {
+        if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
             return std::nullopt;
         }
         const IR::Value index{inst->Arg(0)};
@@ -256,9 +256,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
     // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
     // avoid getting false positives
     static constexpr Bias nvn_bias{
-        .index{0},
-        .offset_begin{0x110},
-        .offset_end{0x610},
+        .index = 0,
+        .offset_begin = 0x110,
+        .offset_end = 0x610,
     };
     // Track the low address of the instruction
     const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
@@ -286,8 +286,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
     info.set.insert(*storage_buffer);
     info.to_replace.push_back(StorageInst{
         .storage_buffer{*storage_buffer},
-        .inst{&inst},
-        .block{&block},
+        .inst = &inst,
+        .block = &block,
     });
 }
 
@@ -312,7 +312,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
 /// Replace a global memory load instruction with its storage buffer equivalent
 void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
                  const IR::U32& offset) {
-    const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())};
+    const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
     const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
     const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
     inst.ReplaceUsesWith(value);
@@ -321,7 +321,7 @@ void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
 /// Replace a global memory write instruction with its storage buffer equivalent
 void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
                   const IR::U32& offset) {
-    const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())};
+    const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
     const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
     block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
     inst.Invalidate();
@@ -330,7 +330,7 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index
 /// Replace a global memory instruction with its storage buffer equivalent
 void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
              const IR::U32& offset) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::LoadGlobalS8:
     case IR::Opcode::LoadGlobalU8:
     case IR::Opcode::LoadGlobalS16:
@@ -348,7 +348,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
     case IR::Opcode::WriteGlobal128:
         return ReplaceWrite(block, inst, storage_index, offset);
     default:
-        throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode());
+        throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
     }
 }
 } // Anonymous namespace
@@ -366,9 +366,9 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
     u32 storage_index{};
     for (const StorageBufferAddr& storage_buffer : info.set) {
         program.info.storage_buffers_descriptors.push_back({
-            .cbuf_index{storage_buffer.index},
-            .cbuf_offset{storage_buffer.offset},
-            .count{1},
+            .cbuf_index = storage_buffer.index,
+            .cbuf_offset = storage_buffer.offset,
+            .count = 1,
             .is_written{info.writes.contains(storage_buffer)},
         });
         ++storage_index;
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
index 8790b48f21..38af72dfea 100644
--- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -22,7 +22,8 @@ void IdentityRemovalPass(IR::Program& program) {
                     inst->SetArg(i, arg.Inst()->Arg(0));
                 }
             }
-            if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) {
+            if (inst->GetOpcode() == IR::Opcode::Identity ||
+                inst->GetOpcode() == IR::Opcode::Void) {
                 to_invalidate.push_back(&*inst);
                 inst = block->Instructions().erase(inst);
             } else {
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
index 0d2c91ed61..52576b07fc 100644
--- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -123,7 +123,7 @@ IR::Opcode Replace(IR::Opcode op) {
 void LowerFp16ToFp32(IR::Program& program) {
     for (IR::Block* const block : program.blocks) {
         for (IR::Inst& inst : block->Instructions()) {
-            inst.ReplaceOpcode(Replace(inst.Opcode()));
+            inst.ReplaceOpcode(Replace(inst.GetOpcode()));
         }
     }
 }
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index ca36253d14..346fcc3774 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -116,7 +116,7 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
 }
 
 [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {
-    return inst.Opcode() == IR::Opcode::Phi;
+    return inst.GetOpcode() == IR::Opcode::Phi;
 }
 
 enum class Status {
@@ -278,7 +278,7 @@ private:
 };
 
 void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::SetRegister:
         if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
             pass.WriteVariable(reg, block, inst.Arg(1));
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index 290ce41791..c8aee3d3d5 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -30,7 +30,7 @@ struct TextureInst {
 using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
 
 IR::Opcode IndexedInstruction(const IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::BindlessImageSampleImplicitLod:
     case IR::Opcode::BoundImageSampleImplicitLod:
         return IR::Opcode::ImageSampleImplicitLod;
@@ -67,7 +67,7 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) {
 }
 
 bool IsBindless(const IR::Inst& inst) {
-    switch (inst.Opcode()) {
+    switch (inst.GetOpcode()) {
     case IR::Opcode::BindlessImageSampleImplicitLod:
     case IR::Opcode::BindlessImageSampleExplicitLod:
     case IR::Opcode::BindlessImageSampleDrefImplicitLod:
@@ -91,7 +91,7 @@ bool IsBindless(const IR::Inst& inst) {
     case IR::Opcode::BoundImageGradient:
         return false;
     default:
-        throw InvalidArgument("Invalid opcode {}", inst.Opcode());
+        throw InvalidArgument("Invalid opcode {}", inst.GetOpcode());
     }
 }
 
@@ -100,7 +100,7 @@ bool IsTextureInstruction(const IR::Inst& inst) {
 }
 
 std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
-    if (inst->Opcode() != IR::Opcode::GetCbufU32) {
+    if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
         return std::nullopt;
     }
     const IR::Value index{inst->Arg(0)};
@@ -134,14 +134,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
         addr = *track_addr;
     } else {
         addr = ConstBufferAddr{
-            .index{env.TextureBoundBuffer()},
-            .offset{inst.Arg(0).U32()},
+            .index = env.TextureBoundBuffer(),
+            .offset = inst.Arg(0).U32(),
         };
     }
     return TextureInst{
         .cbuf{addr},
-        .inst{&inst},
-        .block{block},
+        .inst = &inst,
+        .block = block,
     };
 }
 
@@ -211,7 +211,7 @@ void TexturePass(Environment& env, IR::Program& program) {
 
         const auto& cbuf{texture_inst.cbuf};
         auto flags{inst->Flags<IR::TextureInstInfo>()};
-        switch (inst->Opcode()) {
+        switch (inst->GetOpcode()) {
         case IR::Opcode::ImageQueryDimensions:
             flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset));
             inst->SetFlags(flags);
@@ -235,16 +235,16 @@ void TexturePass(Environment& env, IR::Program& program) {
         u32 index;
         if (flags.type == TextureType::Buffer) {
             index = descriptors.Add(TextureBufferDescriptor{
-                .cbuf_index{cbuf.index},
-                .cbuf_offset{cbuf.offset},
-                .count{1},
+                .cbuf_index = cbuf.index,
+                .cbuf_offset = cbuf.offset,
+                .count = 1,
             });
         } else {
             index = descriptors.Add(TextureDescriptor{
-                .type{flags.type},
-                .cbuf_index{cbuf.index},
-                .cbuf_offset{cbuf.offset},
-                .count{1},
+                .type = flags.type,
+                .cbuf_index = cbuf.index,
+                .cbuf_offset = cbuf.offset,
+                .count = 1,
             });
         }
         inst->SetArg(0, IR::Value{index});
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
index 4080b37cca..dbec96d84a 100644
--- a/src/shader_recompiler/ir_opt/verification_pass.cpp
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -14,14 +14,14 @@ namespace Shader::Optimization {
 static void ValidateTypes(const IR::Program& program) {
     for (const auto& block : program.blocks) {
         for (const IR::Inst& inst : *block) {
-            if (inst.Opcode() == IR::Opcode::Phi) {
+            if (inst.GetOpcode() == IR::Opcode::Phi) {
                 // Skip validation on phi nodes
                 continue;
             }
             const size_t num_args{inst.NumArgs()};
             for (size_t i = 0; i < num_args; ++i) {
                 const IR::Type t1{inst.Arg(i).Type()};
-                const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)};
+                const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
                 if (!IR::AreTypesCompatible(t1, t2)) {
                     throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
                 }
diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp
index ac9912738a..aa6e865934 100644
--- a/src/tests/common/unique_function.cpp
+++ b/src/tests/common/unique_function.cpp
@@ -17,10 +17,12 @@ struct Noisy {
     Noisy& operator=(Noisy&& rhs) noexcept {
         state = "Move assigned";
         rhs.state = "Moved away";
+        return *this;
     }
     Noisy(const Noisy&) : state{"Copied constructed"} {}
     Noisy& operator=(const Noisy&) {
         state = "Copied assigned";
+        return *this;
     }
 
     std::string state;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 71b07c1940..3166a69dc1 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -203,7 +203,7 @@ add_library(video_core STATIC
 create_target_directory_groups(video_core)
 
 target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak)
+target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak)
 
 if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
     add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 893258b4aa..57e2d569c2 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -447,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
         .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
         .pDynamicStates = dynamic_states.data(),
     };
-    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+    [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
         .pNext = nullptr,
         .requiredSubgroupSize = GuestWarpSize,
@@ -457,15 +457,16 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
         if (!spv_modules[stage]) {
             continue;
         }
-        [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
-            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-            .pNext = nullptr,
-            .flags = 0,
-            .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)),
-            .module = *spv_modules[stage],
-            .pName = "main",
-            .pSpecializationInfo = nullptr,
-        });
+        [[maybe_unused]] auto& stage_ci =
+            shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .pNext = nullptr,
+                .flags = 0,
+                .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)),
+                .module = *spv_modules[stage],
+                .pName = "main",
+                .pSpecializationInfo = nullptr,
+            });
         /*
         if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
             stage_ci.pNext = &subgroup_size_ci;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 23bf84a92f..fcebb8f6e2 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -47,7 +47,7 @@ auto MakeSpan(Container& container) {
     return std::span(container.data(), container.size());
 }
 
-u64 MakeCbufKey(u32 index, u32 offset) {
+static u64 MakeCbufKey(u32 index, u32 offset) {
     return (static_cast<u64>(index) << 32) | offset;
 }
 
@@ -638,6 +638,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
         .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
         .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
         .generic_input_types{},
+        .fixed_state_point_size{},
     };
 }
 
@@ -748,7 +749,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
         Shader::Environment& env{*envs[env_index]};
         ++env_index;
 
-        const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)};
+        const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
         Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
         programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
     }
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
index b2dcd74ab9..991afe521e 100644
--- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#pragma once
-
 #include <unordered_map>
 
 #include <boost/container/static_vector.hpp>
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index e42b091c5f..70328680dd 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -279,7 +279,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     };
 }
 
-[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
+[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
     std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
     std::vector<VkBufferCopy> result(copies.size());
     std::ranges::transform(
-- 
cgit v1.2.3-70-g09d2


From 05d41fa9b70af6d469f2f6f1474436c9255e9bc3 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 30 May 2021 23:08:17 -0300
Subject: shader: Add support for "negative" and unaligned offsets

"Negative" offsets don't exist. They are shown as such due to a bug in
nvdisasm.

Unaligned offsets have been proved to read the aligned offset. For
example, when reading an U32, if the offset is 6, the offset read will
be 4.
---
 .../backend/glasm/emit_glasm_context_get_set.cpp                 | 8 ++++++++
 .../backend/spirv/emit_spirv_context_get_set.cpp                 | 9 +++------
 src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp   | 4 ++--
 3 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp')

diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
index 787612def0..9ad668b864 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -18,6 +18,14 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU
         throw NotImplementedException("Indirect constant buffer loading");
     }
     const Register ret{ctx.reg_alloc.Define(inst)};
+    if (offset.type == Type::U32) {
+        // Avoid reading arrays out of bounds, matching hardware's behavior
+        const u32 imm_offset{offset.imm_u32};
+        if (offset.imm_u32 >= 0x10'000) {
+            ctx.Add("MOV.S {},0;", ret);
+            return;
+        }
+    }
     ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
 }
 
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index c1b69c2349..442a958a54 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -122,7 +122,7 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
 }
 
 Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size,
-           const IR::Value& binding, const IR::Value& offset, bool check_alignment = true) {
+           const IR::Value& binding, const IR::Value& offset) {
     if (!binding.IsImmediate()) {
         throw NotImplementedException("Constant buffer indexing");
     }
@@ -138,17 +138,14 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr,
         const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)};
         return ctx.OpLoad(result_type, access_chain);
     }
-    if (check_alignment && offset.U32() % element_size != 0) {
-        throw NotImplementedException("Unaligned immediate constant buffer load");
-    }
+    // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4)
     const Id imm_offset{ctx.Const(offset.U32() / element_size)};
     const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)};
     return ctx.OpLoad(result_type, access_chain);
 }
 
 Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
-    return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset,
-                   false);
+    return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset);
 }
 
 Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 88bbac0a50..b446aae0e5 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -122,14 +122,14 @@ IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
 static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
     union {
         u64 raw;
-        BitField<20, 14, s64> offset;
+        BitField<20, 14, u64> offset;
         BitField<34, 5, u64> binding;
     } const cbuf{insn};
 
     if (cbuf.binding >= 18) {
         throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
     }
-    if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) {
+    if (cbuf.offset >= 0x10'000) {
         throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
     }
     const IR::Value binding{static_cast<u32>(cbuf.binding)};
-- 
cgit v1.2.3-70-g09d2