From ab463712474de5f99eec137a9c6233e55fe184f0 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 8 Mar 2021 18:31:53 -0300
Subject: shader: Initial support for textures and TEX

---
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 133 ++++-
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  21 +-
 .../frontend/ir/microinstruction.cpp               |  73 ++-
 .../frontend/ir/microinstruction.h                 |  22 +-
 src/shader_recompiler/frontend/ir/modifiers.h      |  10 +
 src/shader_recompiler/frontend/ir/opcodes.cpp      |   2 +-
 src/shader_recompiler/frontend/ir/opcodes.inc      | 569 +++++++++++----------
 src/shader_recompiler/frontend/ir/reg.h            |  11 +
 src/shader_recompiler/frontend/ir/value.h          |   1 +
 src/shader_recompiler/frontend/maxwell/maxwell.inc |   4 +-
 src/shader_recompiler/frontend/maxwell/program.cpp |   1 +
 .../maxwell/translate/impl/not_implemented.cpp     |   8 -
 .../maxwell/translate/impl/texture_sample.cpp      | 232 +++++++++
 13 files changed, 772 insertions(+), 315 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp

(limited to 'src/shader_recompiler/frontend')

diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index f38b46bace..ae3354c669 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -7,11 +7,24 @@
 #include "shader_recompiler/frontend/ir/value.h"
 
 namespace Shader::IR {
-
-[[noreturn]] static void ThrowInvalidType(Type type) {
+namespace {
+[[noreturn]] void ThrowInvalidType(Type type) {
     throw InvalidArgument("Invalid type {}", type);
 }
 
+Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
+    if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
+        return ir.CompositeConstruct(bias_lod, lod_clamp);
+    } else if (!bias_lod.IsEmpty()) {
+        return bias_lod;
+    } else if (!lod_clamp.IsEmpty()) {
+        return lod_clamp;
+    } else {
+        return Value{};
+    }
+}
+} // Anonymous namespace
+
 U1 IREmitter::Imm1(bool value) const {
     return U1{Value{value}};
 }
@@ -261,6 +274,10 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) {
     return Inst<U1>(Opcode::GetOverflowFromOp, op);
 }
 
+U1 IREmitter::GetSparseFromOp(const Value& op) {
+    return Inst<U1>(Opcode::GetSparseFromOp, op);
+}
+
 F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
     if (a.Type() != a.Type()) {
         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
@@ -1035,6 +1052,82 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v
     }
 }
 
+F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) {
+    switch (bitsize) {
+    case 16:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F16>(Opcode::ConvertF16S32, value);
+        case Type::U64:
+            return Inst<F16>(Opcode::ConvertF16S64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 32:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F32>(Opcode::ConvertF32S32, value);
+        case Type::U64:
+            return Inst<F32>(Opcode::ConvertF32S64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 64:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F16>(Opcode::ConvertF64S32, value);
+        case Type::U64:
+            return Inst<F16>(Opcode::ConvertF64S64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    default:
+        throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+    }
+}
+
+F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) {
+    switch (bitsize) {
+    case 16:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F16>(Opcode::ConvertF16U32, value);
+        case Type::U64:
+            return Inst<F16>(Opcode::ConvertF16U64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 32:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F32>(Opcode::ConvertF32U32, value);
+        case Type::U64:
+            return Inst<F32>(Opcode::ConvertF32U64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    case 64:
+        switch (value.Type()) {
+        case Type::U32:
+            return Inst<F16>(Opcode::ConvertF64U32, value);
+        case Type::U64:
+            return Inst<F16>(Opcode::ConvertF64U64, value);
+        default:
+            ThrowInvalidType(value.Type());
+        }
+    default:
+        throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+    }
+}
+
+F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) {
+    if (is_signed) {
+        return ConvertSToF(bitsize, value);
+    } else {
+        return ConvertUToF(bitsize, value);
+    }
+}
+
 U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
     switch (result_bitsize) {
     case 32:
@@ -1107,4 +1200,40 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
     throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
 }
 
+Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
+                                        const Value& offset, const F32& lod_clamp,
+                                        TextureInstInfo info) {
+    const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod
+                                         : Opcode::BindlessImageSampleImplicitLod};
+    return Inst(op, Flags{info}, handle, coords, bias_lc, offset);
+}
+
+Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
+                                        const Value& offset, const F32& lod_clamp,
+                                        TextureInstInfo info) {
+    const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)};
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod
+                                         : Opcode::BindlessImageSampleExplicitLod};
+    return Inst(op, Flags{info}, handle, coords, lod_lc, offset);
+}
+
+F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
+                                          const F32& bias, const Value& offset,
+                                          const F32& lod_clamp, TextureInstInfo info) {
+    const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod
+                                         : Opcode::BindlessImageSampleDrefImplicitLod};
+    return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset);
+}
+
+F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
+                                          const F32& lod, const Value& offset, const F32& lod_clamp,
+                                          TextureInstInfo info) {
+    const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)};
+    const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod
+                                         : Opcode::BindlessImageSampleDrefExplicitLod};
+    return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset);
+}
+
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 6e29bf0e20..cb2a7710a1 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -91,6 +91,7 @@ public:
     [[nodiscard]] U1 GetSignFromOp(const Value& op);
     [[nodiscard]] U1 GetCarryFromOp(const Value& op);
     [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
+    [[nodiscard]] U1 GetSparseFromOp(const Value& op);
 
     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
@@ -159,7 +160,7 @@ public:
     [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
                                      const U32& count);
     [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
-                                      bool is_signed);
+                                      bool is_signed = false);
     [[nodiscard]] U32 BitReverse(const U32& value);
     [[nodiscard]] U32 BitCount(const U32& value);
     [[nodiscard]] U32 BitwiseNot(const U32& value);
@@ -186,10 +187,28 @@ public:
     [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
     [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
     [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
+    [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value);
+    [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value);
+    [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value);
 
     [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
     [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
 
+    [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
+                                               const F32& bias, const Value& offset,
+                                               const F32& lod_clamp, TextureInstInfo info);
+    [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
+                                               const F32& lod, const Value& offset,
+                                               const F32& lod_clamp, TextureInstInfo info);
+    [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
+                                                 const F32& dref, const F32& bias,
+                                                 const Value& offset, const F32& lod_clamp,
+                                                 TextureInstInfo info);
+    [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
+                                                 const F32& dref, const F32& lod,
+                                                 const Value& offset, const F32& lod_clamp,
+                                                 TextureInstInfo info);
+
 private:
     IR::Block::iterator insertion_point;
 
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index d6a9be87d5..88e186f215 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -10,26 +10,27 @@
 #include "shader_recompiler/frontend/ir/type.h"
 
 namespace Shader::IR {
-
-static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
+namespace {
+void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
     if (inst && inst->Opcode() != opcode) {
         throw LogicError("Invalid pseudo-instruction");
     }
 }
 
-static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
+void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
     if (dest_inst) {
         throw LogicError("Only one of each type of pseudo-op allowed");
     }
     dest_inst = pseudo_inst;
 }
 
-static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
+void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
     if (inst->Opcode() != expected_opcode) {
         throw LogicError("Undoing use of invalid pseudo-op");
     }
     inst = nullptr;
 }
+} // Anonymous namespace
 
 Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
     if (op == Opcode::Phi) {
@@ -82,6 +83,7 @@ bool Inst::IsPseudoInstruction() const noexcept {
     case Opcode::GetSignFromOp:
     case Opcode::GetCarryFromOp:
     case Opcode::GetOverflowFromOp:
+    case Opcode::GetSparseFromOp:
         return true;
     default:
         return false;
@@ -96,25 +98,26 @@ bool Inst::AreAllArgsImmediates() const {
                        [](const IR::Value& value) { return value.IsImmediate(); });
 }
 
-bool Inst::HasAssociatedPseudoOperation() const noexcept {
-    return zero_inst || sign_inst || carry_inst || overflow_inst;
-}
-
 Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
-    // This is faster than doing a search through the block.
+    if (!associated_insts) {
+        return nullptr;
+    }
     switch (opcode) {
     case Opcode::GetZeroFromOp:
-        CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp);
-        return zero_inst;
+        CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp);
+        return associated_insts->zero_inst;
     case Opcode::GetSignFromOp:
-        CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp);
-        return sign_inst;
+        CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp);
+        return associated_insts->sign_inst;
     case Opcode::GetCarryFromOp:
-        CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp);
-        return carry_inst;
+        CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp);
+        return associated_insts->carry_inst;
     case Opcode::GetOverflowFromOp:
-        CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp);
-        return overflow_inst;
+        CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp);
+        return associated_insts->overflow_inst;
+    case Opcode::GetSparseFromOp:
+        CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
+        return associated_insts->sparse_inst;
     default:
         throw InvalidArgument("{} is not a pseudo-instruction", opcode);
     }
@@ -220,22 +223,37 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) {
     op = opcode;
 }
 
+void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
+    if (!associated_insts) {
+        associated_insts = std::make_unique<AssociatedInsts>();
+    }
+}
+
 void Inst::Use(const Value& value) {
     Inst* const inst{value.Inst()};
     ++inst->use_count;
 
+    std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
     switch (op) {
     case Opcode::GetZeroFromOp:
-        SetPseudoInstruction(inst->zero_inst, this);
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->zero_inst, this);
         break;
     case Opcode::GetSignFromOp:
-        SetPseudoInstruction(inst->sign_inst, this);
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->sign_inst, this);
         break;
     case Opcode::GetCarryFromOp:
-        SetPseudoInstruction(inst->carry_inst, this);
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->carry_inst, this);
         break;
     case Opcode::GetOverflowFromOp:
-        SetPseudoInstruction(inst->overflow_inst, this);
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->overflow_inst, this);
+        break;
+    case Opcode::GetSparseFromOp:
+        AllocAssociatedInsts(assoc_inst);
+        SetPseudoInstruction(assoc_inst->sparse_inst, this);
         break;
     default:
         break;
@@ -246,18 +264,23 @@ void Inst::UndoUse(const Value& value) {
     Inst* const inst{value.Inst()};
     --inst->use_count;
 
+    std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
     switch (op) {
     case Opcode::GetZeroFromOp:
-        RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp);
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp);
         break;
     case Opcode::GetSignFromOp:
-        RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp);
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp);
         break;
     case Opcode::GetCarryFromOp:
-        RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp);
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp);
         break;
     case Opcode::GetOverflowFromOp:
-        RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp);
+        AllocAssociatedInsts(assoc_inst);
+        RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
         break;
     default:
         break;
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 321393dd7c..d5336c4383 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -22,7 +22,7 @@ namespace Shader::IR {
 
 class Block;
 
-constexpr size_t MAX_ARG_COUNT = 4;
+struct AssociatedInsts;
 
 class Inst : public boost::intrusive::list_base_hook<> {
 public:
@@ -50,6 +50,11 @@ public:
         return op;
     }
 
+    /// Determines if there is a pseudo-operation associated with this instruction.
+    [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
+        return associated_insts != nullptr;
+    }
+
     /// Determines whether or not this instruction may have side effects.
     [[nodiscard]] bool MayHaveSideEffects() const noexcept;
 
@@ -60,8 +65,6 @@ public:
     /// Determines if all arguments of this instruction are immediates.
     [[nodiscard]] bool AreAllArgsImmediates() const;
 
-    /// Determines if there is a pseudo-operation associated with this instruction.
-    [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept;
     /// Gets a pseudo-operation associated with this instruction
     [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
 
@@ -122,14 +125,21 @@ private:
     u32 definition{};
     union {
         NonTriviallyDummy dummy{};
-        std::array<Value, MAX_ARG_COUNT> args;
         std::vector<std::pair<Block*, Value>> phi_args;
+        std::array<Value, 5> args;
+    };
+    std::unique_ptr<AssociatedInsts> associated_insts;
+};
+static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
+
+struct AssociatedInsts {
+    union {
+        Inst* sparse_inst;
+        Inst* zero_inst{};
     };
-    Inst* zero_inst{};
     Inst* sign_inst{};
     Inst* carry_inst{};
     Inst* overflow_inst{};
 };
-static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size");
 
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
index 44652eae7c..ad07700aef 100644
--- a/src/shader_recompiler/frontend/ir/modifiers.h
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -4,7 +4,9 @@
 
 #pragma once
 
+#include "common/bit_field.h"
 #include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
 
 namespace Shader::IR {
 
@@ -30,4 +32,12 @@ struct FpControl {
 };
 static_assert(sizeof(FpControl) <= sizeof(u32));
 
+union TextureInstInfo {
+    u32 raw;
+    BitField<0, 8, TextureType> type;
+    BitField<8, 1, u32> has_bias;
+    BitField<16, 1, u32> has_lod_clamp;
+};
+static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
+
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
index 1f188411a9..8492a13d54 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.cpp
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -14,7 +14,7 @@ namespace {
 struct OpcodeMeta {
     std::string_view name;
     Type type;
-    std::array<Type, 4> arg_types;
+    std::array<Type, 5> arg_types;
 };
 
 using enum Type;
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index c4e72c84d9..aa011fab1b 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -2,301 +2,330 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-//     opcode name,                                         return type,    arg1 type,      arg2 type,      arg3 type,      arg4 type,      ...
-OPCODE(Phi,                                                 Opaque,                                                                         )
-OPCODE(Identity,                                            Opaque,         Opaque,                                                         )
-OPCODE(Void,                                                Void,                                                                           )
+//     opcode name,                                         return type,    arg1 type,      arg2 type,      arg3 type,      arg4 type,      arg4 type,      ...
+OPCODE(Phi,                                                 Opaque,                                                                                         )
+OPCODE(Identity,                                            Opaque,         Opaque,                                                                         )
+OPCODE(Void,                                                Void,                                                                                           )
 
 // Control flow
-OPCODE(Branch,                                              Void,           Label,                                                          )
-OPCODE(BranchConditional,                                   Void,           U1,             Label,          Label,                          )
-OPCODE(LoopMerge,                                           Void,           Label,          Label,                                          )
-OPCODE(SelectionMerge,                                      Void,           Label,                                                          )
-OPCODE(Return,                                              Void,                                                                           )
+OPCODE(Branch,                                              Void,           Label,                                                                          )
+OPCODE(BranchConditional,                                   Void,           U1,             Label,          Label,                                          )
+OPCODE(LoopMerge,                                           Void,           Label,          Label,                                                          )
+OPCODE(SelectionMerge,                                      Void,           Label,                                                                          )
+OPCODE(Return,                                              Void,                                                                                           )
 
 // Context getters/setters
-OPCODE(GetRegister,                                         U32,            Reg,                                                            )
-OPCODE(SetRegister,                                         Void,           Reg,            U32,                                            )
-OPCODE(GetPred,                                             U1,             Pred,                                                           )
-OPCODE(SetPred,                                             Void,           Pred,           U1,                                             )
-OPCODE(GetGotoVariable,                                     U1,             U32,                                                            )
-OPCODE(SetGotoVariable,                                     Void,           U32,            U1,                                             )
-OPCODE(GetCbuf,                                             U32,            U32,            U32,                                            )
-OPCODE(GetAttribute,                                        U32,            Attribute,                                                      )
-OPCODE(SetAttribute,                                        Void,           Attribute,      U32,                                            )
-OPCODE(GetAttributeIndexed,                                 U32,            U32,                                                            )
-OPCODE(SetAttributeIndexed,                                 Void,           U32,            U32,                                            )
-OPCODE(GetZFlag,                                            U1,             Void,                                                           )
-OPCODE(GetSFlag,                                            U1,             Void,                                                           )
-OPCODE(GetCFlag,                                            U1,             Void,                                                           )
-OPCODE(GetOFlag,                                            U1,             Void,                                                           )
-OPCODE(SetZFlag,                                            Void,           U1,                                                             )
-OPCODE(SetSFlag,                                            Void,           U1,                                                             )
-OPCODE(SetCFlag,                                            Void,           U1,                                                             )
-OPCODE(SetOFlag,                                            Void,           U1,                                                             )
-OPCODE(WorkgroupId,                                         U32x3,                                                                          )
-OPCODE(LocalInvocationId,                                   U32x3,                                                                          )
+OPCODE(GetRegister,                                         U32,            Reg,                                                                            )
+OPCODE(SetRegister,                                         Void,           Reg,            U32,                                                            )
+OPCODE(GetPred,                                             U1,             Pred,                                                                           )
+OPCODE(SetPred,                                             Void,           Pred,           U1,                                                             )
+OPCODE(GetGotoVariable,                                     U1,             U32,                                                                            )
+OPCODE(SetGotoVariable,                                     Void,           U32,            U1,                                                             )
+OPCODE(GetCbuf,                                             U32,            U32,            U32,                                                            )
+OPCODE(GetAttribute,                                        U32,            Attribute,                                                                      )
+OPCODE(SetAttribute,                                        Void,           Attribute,      U32,                                                            )
+OPCODE(GetAttributeIndexed,                                 U32,            U32,                                                                            )
+OPCODE(SetAttributeIndexed,                                 Void,           U32,            U32,                                                            )
+OPCODE(GetZFlag,                                            U1,             Void,                                                                           )
+OPCODE(GetSFlag,                                            U1,             Void,                                                                           )
+OPCODE(GetCFlag,                                            U1,             Void,                                                                           )
+OPCODE(GetOFlag,                                            U1,             Void,                                                                           )
+OPCODE(SetZFlag,                                            Void,           U1,                                                                             )
+OPCODE(SetSFlag,                                            Void,           U1,                                                                             )
+OPCODE(SetCFlag,                                            Void,           U1,                                                                             )
+OPCODE(SetOFlag,                                            Void,           U1,                                                                             )
+OPCODE(WorkgroupId,                                         U32x3,                                                                                          )
+OPCODE(LocalInvocationId,                                   U32x3,                                                                                          )
 
 // Undefined
-OPCODE(UndefU1,                                             U1,                                                                             )
-OPCODE(UndefU8,                                             U8,                                                                             )
-OPCODE(UndefU16,                                            U16,                                                                            )
-OPCODE(UndefU32,                                            U32,                                                                            )
-OPCODE(UndefU64,                                            U64,                                                                            )
+OPCODE(UndefU1,                                             U1,                                                                                             )
+OPCODE(UndefU8,                                             U8,                                                                                             )
+OPCODE(UndefU16,                                            U16,                                                                                            )
+OPCODE(UndefU32,                                            U32,                                                                                            )
+OPCODE(UndefU64,                                            U64,                                                                                            )
 
 // Memory operations
-OPCODE(LoadGlobalU8,                                        U32,            U64,                                                            )
-OPCODE(LoadGlobalS8,                                        U32,            U64,                                                            )
-OPCODE(LoadGlobalU16,                                       U32,            U64,                                                            )
-OPCODE(LoadGlobalS16,                                       U32,            U64,                                                            )
-OPCODE(LoadGlobal32,                                        U32,            U64,                                                            )
-OPCODE(LoadGlobal64,                                        U32x2,          U64,                                                            )
-OPCODE(LoadGlobal128,                                       U32x4,          U64,                                                            )
-OPCODE(WriteGlobalU8,                                       Void,           U64,            U32,                                            )
-OPCODE(WriteGlobalS8,                                       Void,           U64,            U32,                                            )
-OPCODE(WriteGlobalU16,                                      Void,           U64,            U32,                                            )
-OPCODE(WriteGlobalS16,                                      Void,           U64,            U32,                                            )
-OPCODE(WriteGlobal32,                                       Void,           U64,            U32,                                            )
-OPCODE(WriteGlobal64,                                       Void,           U64,            U32x2,                                          )
-OPCODE(WriteGlobal128,                                      Void,           U64,            U32x4,                                          )
+OPCODE(LoadGlobalU8,                                        U32,            U64,                                                                            )
+OPCODE(LoadGlobalS8,                                        U32,            U64,                                                                            )
+OPCODE(LoadGlobalU16,                                       U32,            U64,                                                                            )
+OPCODE(LoadGlobalS16,                                       U32,            U64,                                                                            )
+OPCODE(LoadGlobal32,                                        U32,            U64,                                                                            )
+OPCODE(LoadGlobal64,                                        U32x2,          U64,                                                                            )
+OPCODE(LoadGlobal128,                                       U32x4,          U64,                                                                            )
+OPCODE(WriteGlobalU8,                                       Void,           U64,            U32,                                                            )
+OPCODE(WriteGlobalS8,                                       Void,           U64,            U32,                                                            )
+OPCODE(WriteGlobalU16,                                      Void,           U64,            U32,                                                            )
+OPCODE(WriteGlobalS16,                                      Void,           U64,            U32,                                                            )
+OPCODE(WriteGlobal32,                                       Void,           U64,            U32,                                                            )
+OPCODE(WriteGlobal64,                                       Void,           U64,            U32x2,                                                          )
+OPCODE(WriteGlobal128,                                      Void,           U64,            U32x4,                                                          )
 
 // Storage buffer operations
-OPCODE(LoadStorageU8,                                       U32,            U32,            U32,                                            )
-OPCODE(LoadStorageS8,                                       U32,            U32,            U32,                                            )
-OPCODE(LoadStorageU16,                                      U32,            U32,            U32,                                            )
-OPCODE(LoadStorageS16,                                      U32,            U32,            U32,                                            )
-OPCODE(LoadStorage32,                                       U32,            U32,            U32,                                            )
-OPCODE(LoadStorage64,                                       U32x2,          U32,            U32,                                            )
-OPCODE(LoadStorage128,                                      U32x4,          U32,            U32,                                            )
-OPCODE(WriteStorageU8,                                      Void,           U32,            U32,            U32,                            )
-OPCODE(WriteStorageS8,                                      Void,           U32,            U32,            U32,                            )
-OPCODE(WriteStorageU16,                                     Void,           U32,            U32,            U32,                            )
-OPCODE(WriteStorageS16,                                     Void,           U32,            U32,            U32,                            )
-OPCODE(WriteStorage32,                                      Void,           U32,            U32,            U32,                            )
-OPCODE(WriteStorage64,                                      Void,           U32,            U32,            U32x2,                          )
-OPCODE(WriteStorage128,                                     Void,           U32,            U32,            U32x4,                          )
+OPCODE(LoadStorageU8,                                       U32,            U32,            U32,                                                            )
+OPCODE(LoadStorageS8,                                       U32,            U32,            U32,                                                            )
+OPCODE(LoadStorageU16,                                      U32,            U32,            U32,                                                            )
+OPCODE(LoadStorageS16,                                      U32,            U32,            U32,                                                            )
+OPCODE(LoadStorage32,                                       U32,            U32,            U32,                                                            )
+OPCODE(LoadStorage64,                                       U32x2,          U32,            U32,                                                            )
+OPCODE(LoadStorage128,                                      U32x4,          U32,            U32,                                                            )
+OPCODE(WriteStorageU8,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageS8,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageU16,                                     Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorageS16,                                     Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorage32,                                      Void,           U32,            U32,            U32,                                            )
+OPCODE(WriteStorage64,                                      Void,           U32,            U32,            U32x2,                                          )
+OPCODE(WriteStorage128,                                     Void,           U32,            U32,            U32x4,                                          )
 
 // Vector utility
-OPCODE(CompositeConstructU32x2,                             U32x2,          U32,            U32,                                            )
-OPCODE(CompositeConstructU32x3,                             U32x3,          U32,            U32,            U32,                            )
-OPCODE(CompositeConstructU32x4,                             U32x4,          U32,            U32,            U32,            U32,            )
-OPCODE(CompositeExtractU32x2,                               U32,            U32x2,          U32,                                            )
-OPCODE(CompositeExtractU32x3,                               U32,            U32x3,          U32,                                            )
-OPCODE(CompositeExtractU32x4,                               U32,            U32x4,          U32,                                            )
-OPCODE(CompositeInsertU32x2,                                U32x2,          U32x2,          U32,            U32,                            )
-OPCODE(CompositeInsertU32x3,                                U32x3,          U32x3,          U32,            U32,                            )
-OPCODE(CompositeInsertU32x4,                                U32x4,          U32x4,          U32,            U32,                            )
-OPCODE(CompositeConstructF16x2,                             F16x2,          F16,            F16,                                            )
-OPCODE(CompositeConstructF16x3,                             F16x3,          F16,            F16,            F16,                            )
-OPCODE(CompositeConstructF16x4,                             F16x4,          F16,            F16,            F16,            F16,            )
-OPCODE(CompositeExtractF16x2,                               F16,            F16x2,          U32,                                            )
-OPCODE(CompositeExtractF16x3,                               F16,            F16x3,          U32,                                            )
-OPCODE(CompositeExtractF16x4,                               F16,            F16x4,          U32,                                            )
-OPCODE(CompositeInsertF16x2,                                F16x2,          F16x2,          F16,            U32,                            )
-OPCODE(CompositeInsertF16x3,                                F16x3,          F16x3,          F16,            U32,                            )
-OPCODE(CompositeInsertF16x4,                                F16x4,          F16x4,          F16,            U32,                            )
-OPCODE(CompositeConstructF32x2,                             F32x2,          F32,            F32,                                            )
-OPCODE(CompositeConstructF32x3,                             F32x3,          F32,            F32,            F32,                            )
-OPCODE(CompositeConstructF32x4,                             F32x4,          F32,            F32,            F32,            F32,            )
-OPCODE(CompositeExtractF32x2,                               F32,            F32x2,          U32,                                            )
-OPCODE(CompositeExtractF32x3,                               F32,            F32x3,          U32,                                            )
-OPCODE(CompositeExtractF32x4,                               F32,            F32x4,          U32,                                            )
-OPCODE(CompositeInsertF32x2,                                F32x2,          F32x2,          F32,            U32,                            )
-OPCODE(CompositeInsertF32x3,                                F32x3,          F32x3,          F32,            U32,                            )
-OPCODE(CompositeInsertF32x4,                                F32x4,          F32x4,          F32,            U32,                            )
-OPCODE(CompositeConstructF64x2,                             F64x2,          F64,            F64,                                            )
-OPCODE(CompositeConstructF64x3,                             F64x3,          F64,            F64,            F64,                            )
-OPCODE(CompositeConstructF64x4,                             F64x4,          F64,            F64,            F64,            F64,            )
-OPCODE(CompositeExtractF64x2,                               F64,            F64x2,          U32,                                            )
-OPCODE(CompositeExtractF64x3,                               F64,            F64x3,          U32,                                            )
-OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                            )
-OPCODE(CompositeInsertF64x2,                                F64x2,          F64x2,          F64,            U32,                            )
-OPCODE(CompositeInsertF64x3,                                F64x3,          F64x3,          F64,            U32,                            )
-OPCODE(CompositeInsertF64x4,                                F64x4,          F64x4,          F64,            U32,                            )
+OPCODE(CompositeConstructU32x2,                             U32x2,          U32,            U32,                                                            )
+OPCODE(CompositeConstructU32x3,                             U32x3,          U32,            U32,            U32,                                            )
+OPCODE(CompositeConstructU32x4,                             U32x4,          U32,            U32,            U32,            U32,                            )
+OPCODE(CompositeExtractU32x2,                               U32,            U32x2,          U32,                                                            )
+OPCODE(CompositeExtractU32x3,                               U32,            U32x3,          U32,                                                            )
+OPCODE(CompositeExtractU32x4,                               U32,            U32x4,          U32,                                                            )
+OPCODE(CompositeInsertU32x2,                                U32x2,          U32x2,          U32,            U32,                                            )
+OPCODE(CompositeInsertU32x3,                                U32x3,          U32x3,          U32,            U32,                                            )
+OPCODE(CompositeInsertU32x4,                                U32x4,          U32x4,          U32,            U32,                                            )
+OPCODE(CompositeConstructF16x2,                             F16x2,          F16,            F16,                                                            )
+OPCODE(CompositeConstructF16x3,                             F16x3,          F16,            F16,            F16,                                            )
+OPCODE(CompositeConstructF16x4,                             F16x4,          F16,            F16,            F16,            F16,                            )
+OPCODE(CompositeExtractF16x2,                               F16,            F16x2,          U32,                                                            )
+OPCODE(CompositeExtractF16x3,                               F16,            F16x3,          U32,                                                            )
+OPCODE(CompositeExtractF16x4,                               F16,            F16x4,          U32,                                                            )
+OPCODE(CompositeInsertF16x2,                                F16x2,          F16x2,          F16,            U32,                                            )
+OPCODE(CompositeInsertF16x3,                                F16x3,          F16x3,          F16,            U32,                                            )
+OPCODE(CompositeInsertF16x4,                                F16x4,          F16x4,          F16,            U32,                                            )
+OPCODE(CompositeConstructF32x2,                             F32x2,          F32,            F32,                                                            )
+OPCODE(CompositeConstructF32x3,                             F32x3,          F32,            F32,            F32,                                            )
+OPCODE(CompositeConstructF32x4,                             F32x4,          F32,            F32,            F32,            F32,                            )
+OPCODE(CompositeExtractF32x2,                               F32,            F32x2,          U32,                                                            )
+OPCODE(CompositeExtractF32x3,                               F32,            F32x3,          U32,                                                            )
+OPCODE(CompositeExtractF32x4,                               F32,            F32x4,          U32,                                                            )
+OPCODE(CompositeInsertF32x2,                                F32x2,          F32x2,          F32,            U32,                                            )
+OPCODE(CompositeInsertF32x3,                                F32x3,          F32x3,          F32,            U32,                                            )
+OPCODE(CompositeInsertF32x4,                                F32x4,          F32x4,          F32,            U32,                                            )
+OPCODE(CompositeConstructF64x2,                             F64x2,          F64,            F64,                                                            )
+OPCODE(CompositeConstructF64x3,                             F64x3,          F64,            F64,            F64,                                            )
+OPCODE(CompositeConstructF64x4,                             F64x4,          F64,            F64,            F64,            F64,                            )
+OPCODE(CompositeExtractF64x2,                               F64,            F64x2,          U32,                                                            )
+OPCODE(CompositeExtractF64x3,                               F64,            F64x3,          U32,                                                            )
+OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                                            )
+OPCODE(CompositeInsertF64x2,                                F64x2,          F64x2,          F64,            U32,                                            )
+OPCODE(CompositeInsertF64x3,                                F64x3,          F64x3,          F64,            U32,                                            )
+OPCODE(CompositeInsertF64x4,                                F64x4,          F64x4,          F64,            U32,                                            )
 
 // Select operations
-OPCODE(SelectU1,                                            U1,             U1,             U1,             U1,                             )
-OPCODE(SelectU8,                                            U8,             U1,             U8,             U8,                             )
-OPCODE(SelectU16,                                           U16,            U1,             U16,            U16,                            )
-OPCODE(SelectU32,                                           U32,            U1,             U32,            U32,                            )
-OPCODE(SelectU64,                                           U64,            U1,             U64,            U64,                            )
-OPCODE(SelectF16,                                           F16,            U1,             F16,            F16,                            )
-OPCODE(SelectF32,                                           F32,            U1,             F32,            F32,                            )
+OPCODE(SelectU1,                                            U1,             U1,             U1,             U1,                                             )
+OPCODE(SelectU8,                                            U8,             U1,             U8,             U8,                                             )
+OPCODE(SelectU16,                                           U16,            U1,             U16,            U16,                                            )
+OPCODE(SelectU32,                                           U32,            U1,             U32,            U32,                                            )
+OPCODE(SelectU64,                                           U64,            U1,             U64,            U64,                                            )
+OPCODE(SelectF16,                                           F16,            U1,             F16,            F16,                                            )
+OPCODE(SelectF32,                                           F32,            U1,             F32,            F32,                                            )
 
 // Bitwise conversions
-OPCODE(BitCastU16F16,                                       U16,            F16,                                                            )
-OPCODE(BitCastU32F32,                                       U32,            F32,                                                            )
-OPCODE(BitCastU64F64,                                       U64,            F64,                                                            )
-OPCODE(BitCastF16U16,                                       F16,            U16,                                                            )
-OPCODE(BitCastF32U32,                                       F32,            U32,                                                            )
-OPCODE(BitCastF64U64,                                       F64,            U64,                                                            )
-OPCODE(PackUint2x32,                                        U64,            U32x2,                                                          )
-OPCODE(UnpackUint2x32,                                      U32x2,          U64,                                                            )
-OPCODE(PackFloat2x16,                                       U32,            F16x2,                                                          )
-OPCODE(UnpackFloat2x16,                                     F16x2,          U32,                                                            )
-OPCODE(PackHalf2x16,                                        U32,            F32x2,                                                          )
-OPCODE(UnpackHalf2x16,                                      F32x2,          U32,                                                            )
-OPCODE(PackDouble2x32,                                      F64,            U32x2,                                                          )
-OPCODE(UnpackDouble2x32,                                    U32x2,          F64,                                                            )
+OPCODE(BitCastU16F16,                                       U16,            F16,                                                                            )
+OPCODE(BitCastU32F32,                                       U32,            F32,                                                                            )
+OPCODE(BitCastU64F64,                                       U64,            F64,                                                                            )
+OPCODE(BitCastF16U16,                                       F16,            U16,                                                                            )
+OPCODE(BitCastF32U32,                                       F32,            U32,                                                                            )
+OPCODE(BitCastF64U64,                                       F64,            U64,                                                                            )
+OPCODE(PackUint2x32,                                        U64,            U32x2,                                                                          )
+OPCODE(UnpackUint2x32,                                      U32x2,          U64,                                                                            )
+OPCODE(PackFloat2x16,                                       U32,            F16x2,                                                                          )
+OPCODE(UnpackFloat2x16,                                     F16x2,          U32,                                                                            )
+OPCODE(PackHalf2x16,                                        U32,            F32x2,                                                                          )
+OPCODE(UnpackHalf2x16,                                      F32x2,          U32,                                                                            )
+OPCODE(PackDouble2x32,                                      F64,            U32x2,                                                                          )
+OPCODE(UnpackDouble2x32,                                    U32x2,          F64,                                                                            )
 
 // Pseudo-operation, handled specially at final emit
-OPCODE(GetZeroFromOp,                                       U1,             Opaque,                                                         )
-OPCODE(GetSignFromOp,                                       U1,             Opaque,                                                         )
-OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                         )
-OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                         )
+OPCODE(GetZeroFromOp,                                       U1,             Opaque,                                                                         )
+OPCODE(GetSignFromOp,                                       U1,             Opaque,                                                                         )
+OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                                         )
+OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                                         )
+OPCODE(GetSparseFromOp,                                     U1,             Opaque,                                                                         )
 
 // Floating-point operations
-OPCODE(FPAbs16,                                             F16,            F16,                                                            )
-OPCODE(FPAbs32,                                             F32,            F32,                                                            )
-OPCODE(FPAbs64,                                             F64,            F64,                                                            )
-OPCODE(FPAdd16,                                             F16,            F16,            F16,                                            )
-OPCODE(FPAdd32,                                             F32,            F32,            F32,                                            )
-OPCODE(FPAdd64,                                             F64,            F64,            F64,                                            )
-OPCODE(FPFma16,                                             F16,            F16,            F16,            F16,                            )
-OPCODE(FPFma32,                                             F32,            F32,            F32,            F32,                            )
-OPCODE(FPFma64,                                             F64,            F64,            F64,            F64,                            )
-OPCODE(FPMax32,                                             F32,            F32,            F32,                                            )
-OPCODE(FPMax64,                                             F64,            F64,            F64,                                            )
-OPCODE(FPMin32,                                             F32,            F32,            F32,                                            )
-OPCODE(FPMin64,                                             F64,            F64,            F64,                                            )
-OPCODE(FPMul16,                                             F16,            F16,            F16,                                            )
-OPCODE(FPMul32,                                             F32,            F32,            F32,                                            )
-OPCODE(FPMul64,                                             F64,            F64,            F64,                                            )
-OPCODE(FPNeg16,                                             F16,            F16,                                                            )
-OPCODE(FPNeg32,                                             F32,            F32,                                                            )
-OPCODE(FPNeg64,                                             F64,            F64,                                                            )
-OPCODE(FPRecip32,                                           F32,            F32,                                                            )
-OPCODE(FPRecip64,                                           F64,            F64,                                                            )
-OPCODE(FPRecipSqrt32,                                       F32,            F32,                                                            )
-OPCODE(FPRecipSqrt64,                                       F64,            F64,                                                            )
-OPCODE(FPSqrt,                                              F32,            F32,                                                            )
-OPCODE(FPSin,                                               F32,            F32,                                                            )
-OPCODE(FPExp2,                                              F32,            F32,                                                            )
-OPCODE(FPCos,                                               F32,            F32,                                                            )
-OPCODE(FPLog2,                                              F32,            F32,                                                            )
-OPCODE(FPSaturate16,                                        F16,            F16,                                                            )
-OPCODE(FPSaturate32,                                        F32,            F32,                                                            )
-OPCODE(FPSaturate64,                                        F64,            F64,                                                            )
-OPCODE(FPRoundEven16,                                       F16,            F16,                                                            )
-OPCODE(FPRoundEven32,                                       F32,            F32,                                                            )
-OPCODE(FPRoundEven64,                                       F64,            F64,                                                            )
-OPCODE(FPFloor16,                                           F16,            F16,                                                            )
-OPCODE(FPFloor32,                                           F32,            F32,                                                            )
-OPCODE(FPFloor64,                                           F64,            F64,                                                            )
-OPCODE(FPCeil16,                                            F16,            F16,                                                            )
-OPCODE(FPCeil32,                                            F32,            F32,                                                            )
-OPCODE(FPCeil64,                                            F64,            F64,                                                            )
-OPCODE(FPTrunc16,                                           F16,            F16,                                                            )
-OPCODE(FPTrunc32,                                           F32,            F32,                                                            )
-OPCODE(FPTrunc64,                                           F64,            F64,                                                            )
+OPCODE(FPAbs16,                                             F16,            F16,                                                                            )
+OPCODE(FPAbs32,                                             F32,            F32,                                                                            )
+OPCODE(FPAbs64,                                             F64,            F64,                                                                            )
+OPCODE(FPAdd16,                                             F16,            F16,            F16,                                                            )
+OPCODE(FPAdd32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPAdd64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPFma16,                                             F16,            F16,            F16,            F16,                                            )
+OPCODE(FPFma32,                                             F32,            F32,            F32,            F32,                                            )
+OPCODE(FPFma64,                                             F64,            F64,            F64,            F64,                                            )
+OPCODE(FPMax32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPMax64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPMin32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPMin64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPMul16,                                             F16,            F16,            F16,                                                            )
+OPCODE(FPMul32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPMul64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPNeg16,                                             F16,            F16,                                                                            )
+OPCODE(FPNeg32,                                             F32,            F32,                                                                            )
+OPCODE(FPNeg64,                                             F64,            F64,                                                                            )
+OPCODE(FPRecip32,                                           F32,            F32,                                                                            )
+OPCODE(FPRecip64,                                           F64,            F64,                                                                            )
+OPCODE(FPRecipSqrt32,                                       F32,            F32,                                                                            )
+OPCODE(FPRecipSqrt64,                                       F64,            F64,                                                                            )
+OPCODE(FPSqrt,                                              F32,            F32,                                                                            )
+OPCODE(FPSin,                                               F32,            F32,                                                                            )
+OPCODE(FPExp2,                                              F32,            F32,                                                                            )
+OPCODE(FPCos,                                               F32,            F32,                                                                            )
+OPCODE(FPLog2,                                              F32,            F32,                                                                            )
+OPCODE(FPSaturate16,                                        F16,            F16,                                                                            )
+OPCODE(FPSaturate32,                                        F32,            F32,                                                                            )
+OPCODE(FPSaturate64,                                        F64,            F64,                                                                            )
+OPCODE(FPRoundEven16,                                       F16,            F16,                                                                            )
+OPCODE(FPRoundEven32,                                       F32,            F32,                                                                            )
+OPCODE(FPRoundEven64,                                       F64,            F64,                                                                            )
+OPCODE(FPFloor16,                                           F16,            F16,                                                                            )
+OPCODE(FPFloor32,                                           F32,            F32,                                                                            )
+OPCODE(FPFloor64,                                           F64,            F64,                                                                            )
+OPCODE(FPCeil16,                                            F16,            F16,                                                                            )
+OPCODE(FPCeil32,                                            F32,            F32,                                                                            )
+OPCODE(FPCeil64,                                            F64,            F64,                                                                            )
+OPCODE(FPTrunc16,                                           F16,            F16,                                                                            )
+OPCODE(FPTrunc32,                                           F32,            F32,                                                                            )
+OPCODE(FPTrunc64,                                           F64,            F64,                                                                            )
 
-OPCODE(FPOrdEqual16,                                        U1,             F16,            F16,                                            )
-OPCODE(FPOrdEqual32,                                        U1,             F32,            F32,                                            )
-OPCODE(FPOrdEqual64,                                        U1,             F64,            F64,                                            )
-OPCODE(FPUnordEqual16,                                      U1,             F16,            F16,                                            )
-OPCODE(FPUnordEqual32,                                      U1,             F32,            F32,                                            )
-OPCODE(FPUnordEqual64,                                      U1,             F64,            F64,                                            )
-OPCODE(FPOrdNotEqual16,                                     U1,             F16,            F16,                                            )
-OPCODE(FPOrdNotEqual32,                                     U1,             F32,            F32,                                            )
-OPCODE(FPOrdNotEqual64,                                     U1,             F64,            F64,                                            )
-OPCODE(FPUnordNotEqual16,                                   U1,             F16,            F16,                                            )
-OPCODE(FPUnordNotEqual32,                                   U1,             F32,            F32,                                            )
-OPCODE(FPUnordNotEqual64,                                   U1,             F64,            F64,                                            )
-OPCODE(FPOrdLessThan16,                                     U1,             F16,            F16,                                            )
-OPCODE(FPOrdLessThan32,                                     U1,             F32,            F32,                                            )
-OPCODE(FPOrdLessThan64,                                     U1,             F64,            F64,                                            )
-OPCODE(FPUnordLessThan16,                                   U1,             F16,            F16,                                            )
-OPCODE(FPUnordLessThan32,                                   U1,             F32,            F32,                                            )
-OPCODE(FPUnordLessThan64,                                   U1,             F64,            F64,                                            )
-OPCODE(FPOrdGreaterThan16,                                  U1,             F16,            F16,                                            )
-OPCODE(FPOrdGreaterThan32,                                  U1,             F32,            F32,                                            )
-OPCODE(FPOrdGreaterThan64,                                  U1,             F64,            F64,                                            )
-OPCODE(FPUnordGreaterThan16,                                U1,             F16,            F16,                                            )
-OPCODE(FPUnordGreaterThan32,                                U1,             F32,            F32,                                            )
-OPCODE(FPUnordGreaterThan64,                                U1,             F64,            F64,                                            )
-OPCODE(FPOrdLessThanEqual16,                                U1,             F16,            F16,                                            )
-OPCODE(FPOrdLessThanEqual32,                                U1,             F32,            F32,                                            )
-OPCODE(FPOrdLessThanEqual64,                                U1,             F64,            F64,                                            )
-OPCODE(FPUnordLessThanEqual16,                              U1,             F16,            F16,                                            )
-OPCODE(FPUnordLessThanEqual32,                              U1,             F32,            F32,                                            )
-OPCODE(FPUnordLessThanEqual64,                              U1,             F64,            F64,                                            )
-OPCODE(FPOrdGreaterThanEqual16,                             U1,             F16,            F16,                                            )
-OPCODE(FPOrdGreaterThanEqual32,                             U1,             F32,            F32,                                            )
-OPCODE(FPOrdGreaterThanEqual64,                             U1,             F64,            F64,                                            )
-OPCODE(FPUnordGreaterThanEqual16,                           U1,             F16,            F16,                                            )
-OPCODE(FPUnordGreaterThanEqual32,                           U1,             F32,            F32,                                            )
-OPCODE(FPUnordGreaterThanEqual64,                           U1,             F64,            F64,                                            )
+OPCODE(FPOrdEqual16,                                        U1,             F16,            F16,                                                            )
+OPCODE(FPOrdEqual32,                                        U1,             F32,            F32,                                                            )
+OPCODE(FPOrdEqual64,                                        U1,             F64,            F64,                                                            )
+OPCODE(FPUnordEqual16,                                      U1,             F16,            F16,                                                            )
+OPCODE(FPUnordEqual32,                                      U1,             F32,            F32,                                                            )
+OPCODE(FPUnordEqual64,                                      U1,             F64,            F64,                                                            )
+OPCODE(FPOrdNotEqual16,                                     U1,             F16,            F16,                                                            )
+OPCODE(FPOrdNotEqual32,                                     U1,             F32,            F32,                                                            )
+OPCODE(FPOrdNotEqual64,                                     U1,             F64,            F64,                                                            )
+OPCODE(FPUnordNotEqual16,                                   U1,             F16,            F16,                                                            )
+OPCODE(FPUnordNotEqual32,                                   U1,             F32,            F32,                                                            )
+OPCODE(FPUnordNotEqual64,                                   U1,             F64,            F64,                                                            )
+OPCODE(FPOrdLessThan16,                                     U1,             F16,            F16,                                                            )
+OPCODE(FPOrdLessThan32,                                     U1,             F32,            F32,                                                            )
+OPCODE(FPOrdLessThan64,                                     U1,             F64,            F64,                                                            )
+OPCODE(FPUnordLessThan16,                                   U1,             F16,            F16,                                                            )
+OPCODE(FPUnordLessThan32,                                   U1,             F32,            F32,                                                            )
+OPCODE(FPUnordLessThan64,                                   U1,             F64,            F64,                                                            )
+OPCODE(FPOrdGreaterThan16,                                  U1,             F16,            F16,                                                            )
+OPCODE(FPOrdGreaterThan32,                                  U1,             F32,            F32,                                                            )
+OPCODE(FPOrdGreaterThan64,                                  U1,             F64,            F64,                                                            )
+OPCODE(FPUnordGreaterThan16,                                U1,             F16,            F16,                                                            )
+OPCODE(FPUnordGreaterThan32,                                U1,             F32,            F32,                                                            )
+OPCODE(FPUnordGreaterThan64,                                U1,             F64,            F64,                                                            )
+OPCODE(FPOrdLessThanEqual16,                                U1,             F16,            F16,                                                            )
+OPCODE(FPOrdLessThanEqual32,                                U1,             F32,            F32,                                                            )
+OPCODE(FPOrdLessThanEqual64,                                U1,             F64,            F64,                                                            )
+OPCODE(FPUnordLessThanEqual16,                              U1,             F16,            F16,                                                            )
+OPCODE(FPUnordLessThanEqual32,                              U1,             F32,            F32,                                                            )
+OPCODE(FPUnordLessThanEqual64,                              U1,             F64,            F64,                                                            )
+OPCODE(FPOrdGreaterThanEqual16,                             U1,             F16,            F16,                                                            )
+OPCODE(FPOrdGreaterThanEqual32,                             U1,             F32,            F32,                                                            )
+OPCODE(FPOrdGreaterThanEqual64,                             U1,             F64,            F64,                                                            )
+OPCODE(FPUnordGreaterThanEqual16,                           U1,             F16,            F16,                                                            )
+OPCODE(FPUnordGreaterThanEqual32,                           U1,             F32,            F32,                                                            )
+OPCODE(FPUnordGreaterThanEqual64,                           U1,             F64,            F64,                                                            )
 
 // Integer operations
-OPCODE(IAdd32,                                              U32,            U32,            U32,                                            )
-OPCODE(IAdd64,                                              U64,            U64,            U64,                                            )
-OPCODE(ISub32,                                              U32,            U32,            U32,                                            )
-OPCODE(ISub64,                                              U64,            U64,            U64,                                            )
-OPCODE(IMul32,                                              U32,            U32,            U32,                                            )
-OPCODE(INeg32,                                              U32,            U32,                                                            )
-OPCODE(INeg64,                                              U64,            U64,                                                            )
-OPCODE(IAbs32,                                              U32,            U32,                                                            )
-OPCODE(ShiftLeftLogical32,                                  U32,            U32,            U32,                                            )
-OPCODE(ShiftLeftLogical64,                                  U64,            U64,            U32,                                            )
-OPCODE(ShiftRightLogical32,                                 U32,            U32,            U32,                                            )
-OPCODE(ShiftRightLogical64,                                 U64,            U64,            U32,                                            )
-OPCODE(ShiftRightArithmetic32,                              U32,            U32,            U32,                                            )
-OPCODE(ShiftRightArithmetic64,                              U64,            U64,            U32,                                            )
-OPCODE(BitwiseAnd32,                                        U32,            U32,            U32,                                            )
-OPCODE(BitwiseOr32,                                         U32,            U32,            U32,                                            )
-OPCODE(BitwiseXor32,                                        U32,            U32,            U32,                                            )
-OPCODE(BitFieldInsert,                                      U32,            U32,            U32,            U32,            U32,            )
-OPCODE(BitFieldSExtract,                                    U32,            U32,            U32,            U32,                            )
-OPCODE(BitFieldUExtract,                                    U32,            U32,            U32,            U32,                            )
-OPCODE(BitReverse32,                                        U32,            U32,                                                            )
-OPCODE(BitCount32,                                          U32,            U32,                                                            )
-OPCODE(BitwiseNot32,                                        U32,            U32,                                                            )
+OPCODE(IAdd32,                                              U32,            U32,            U32,                                                            )
+OPCODE(IAdd64,                                              U64,            U64,            U64,                                                            )
+OPCODE(ISub32,                                              U32,            U32,            U32,                                                            )
+OPCODE(ISub64,                                              U64,            U64,            U64,                                                            )
+OPCODE(IMul32,                                              U32,            U32,            U32,                                                            )
+OPCODE(INeg32,                                              U32,            U32,                                                                            )
+OPCODE(INeg64,                                              U64,            U64,                                                                            )
+OPCODE(IAbs32,                                              U32,            U32,                                                                            )
+OPCODE(ShiftLeftLogical32,                                  U32,            U32,            U32,                                                            )
+OPCODE(ShiftLeftLogical64,                                  U64,            U64,            U32,                                                            )
+OPCODE(ShiftRightLogical32,                                 U32,            U32,            U32,                                                            )
+OPCODE(ShiftRightLogical64,                                 U64,            U64,            U32,                                                            )
+OPCODE(ShiftRightArithmetic32,                              U32,            U32,            U32,                                                            )
+OPCODE(ShiftRightArithmetic64,                              U64,            U64,            U32,                                                            )
+OPCODE(BitwiseAnd32,                                        U32,            U32,            U32,                                                            )
+OPCODE(BitwiseOr32,                                         U32,            U32,            U32,                                                            )
+OPCODE(BitwiseXor32,                                        U32,            U32,            U32,                                                            )
+OPCODE(BitFieldInsert,                                      U32,            U32,            U32,            U32,            U32,                            )
+OPCODE(BitFieldSExtract,                                    U32,            U32,            U32,            U32,                                            )
+OPCODE(BitFieldUExtract,                                    U32,            U32,            U32,            U32,                                            )
+OPCODE(BitReverse32,                                        U32,            U32,                                                                            )
+OPCODE(BitCount32,                                          U32,            U32,                                                                            )
+OPCODE(BitwiseNot32,                                        U32,            U32,                                                                            )
 
-OPCODE(FindSMsb32,                                          U32,            U32,                                                            )
-OPCODE(FindUMsb32,                                          U32,            U32,                                                            )
-OPCODE(SMin32,                                              U32,            U32,            U32,                                            )
-OPCODE(UMin32,                                              U32,            U32,            U32,                                            )
-OPCODE(SMax32,                                              U32,            U32,            U32,                                            )
-OPCODE(UMax32,                                              U32,            U32,            U32,                                            )
-OPCODE(SLessThan,                                           U1,             U32,            U32,                                            )
-OPCODE(ULessThan,                                           U1,             U32,            U32,                                            )
-OPCODE(IEqual,                                              U1,             U32,            U32,                                            )
-OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                            )
-OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                            )
-OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                            )
-OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                            )
-OPCODE(INotEqual,                                           U1,             U32,            U32,                                            )
-OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                            )
-OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                            )
+OPCODE(FindSMsb32,                                          U32,            U32,                                                                            )
+OPCODE(FindUMsb32,                                          U32,            U32,                                                                            )
+OPCODE(SMin32,                                              U32,            U32,            U32,                                                            )
+OPCODE(UMin32,                                              U32,            U32,            U32,                                                            )
+OPCODE(SMax32,                                              U32,            U32,            U32,                                                            )
+OPCODE(UMax32,                                              U32,            U32,            U32,                                                            )
+OPCODE(SLessThan,                                           U1,             U32,            U32,                                                            )
+OPCODE(ULessThan,                                           U1,             U32,            U32,                                                            )
+OPCODE(IEqual,                                              U1,             U32,            U32,                                                            )
+OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                                            )
+OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                                            )
+OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                                            )
+OPCODE(INotEqual,                                           U1,             U32,            U32,                                                            )
+OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
+OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
 
 // Logical operations
-OPCODE(LogicalOr,                                           U1,             U1,             U1,                                             )
-OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                             )
-OPCODE(LogicalXor,                                          U1,             U1,             U1,                                             )
-OPCODE(LogicalNot,                                          U1,             U1,                                                             )
+OPCODE(LogicalOr,                                           U1,             U1,             U1,                                                             )
+OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                                             )
+OPCODE(LogicalXor,                                          U1,             U1,             U1,                                                             )
+OPCODE(LogicalNot,                                          U1,             U1,                                                                             )
 
 // Conversion operations
-OPCODE(ConvertS16F16,                                       U32,            F16,                                                            )
-OPCODE(ConvertS16F32,                                       U32,            F32,                                                            )
-OPCODE(ConvertS16F64,                                       U32,            F64,                                                            )
-OPCODE(ConvertS32F16,                                       U32,            F16,                                                            )
-OPCODE(ConvertS32F32,                                       U32,            F32,                                                            )
-OPCODE(ConvertS32F64,                                       U32,            F64,                                                            )
-OPCODE(ConvertS64F16,                                       U64,            F16,                                                            )
-OPCODE(ConvertS64F32,                                       U64,            F32,                                                            )
-OPCODE(ConvertS64F64,                                       U64,            F64,                                                            )
-OPCODE(ConvertU16F16,                                       U32,            F16,                                                            )
-OPCODE(ConvertU16F32,                                       U32,            F32,                                                            )
-OPCODE(ConvertU16F64,                                       U32,            F64,                                                            )
-OPCODE(ConvertU32F16,                                       U32,            F16,                                                            )
-OPCODE(ConvertU32F32,                                       U32,            F32,                                                            )
-OPCODE(ConvertU32F64,                                       U32,            F64,                                                            )
-OPCODE(ConvertU64F16,                                       U64,            F16,                                                            )
-OPCODE(ConvertU64F32,                                       U64,            F32,                                                            )
-OPCODE(ConvertU64F64,                                       U64,            F64,                                                            )
-OPCODE(ConvertU64U32,                                       U64,            U32,                                                            )
-OPCODE(ConvertU32U64,                                       U32,            U64,                                                            )
-OPCODE(ConvertF16F32,                                       F16,            F32,                                                            )
-OPCODE(ConvertF32F16,                                       F32,            F16,                                                            )
-OPCODE(ConvertF32F64,                                       F32,            F64,                                                            )
-OPCODE(ConvertF64F32,                                       F64,            F32,                                                            )
+OPCODE(ConvertS16F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertS16F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertS16F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertS32F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertS32F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertS32F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertS64F16,                                       U64,            F16,                                                                            )
+OPCODE(ConvertS64F32,                                       U64,            F32,                                                                            )
+OPCODE(ConvertS64F64,                                       U64,            F64,                                                                            )
+OPCODE(ConvertU16F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertU16F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertU16F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertU32F16,                                       U32,            F16,                                                                            )
+OPCODE(ConvertU32F32,                                       U32,            F32,                                                                            )
+OPCODE(ConvertU32F64,                                       U32,            F64,                                                                            )
+OPCODE(ConvertU64F16,                                       U64,            F16,                                                                            )
+OPCODE(ConvertU64F32,                                       U64,            F32,                                                                            )
+OPCODE(ConvertU64F64,                                       U64,            F64,                                                                            )
+OPCODE(ConvertU64U32,                                       U64,            U32,                                                                            )
+OPCODE(ConvertU32U64,                                       U32,            U64,                                                                            )
+OPCODE(ConvertF16F32,                                       F16,            F32,                                                                            )
+OPCODE(ConvertF32F16,                                       F32,            F16,                                                                            )
+OPCODE(ConvertF32F64,                                       F32,            F64,                                                                            )
+OPCODE(ConvertF64F32,                                       F64,            F32,                                                                            )
+OPCODE(ConvertF16S32,                                       F16,            U32,                                                                            )
+OPCODE(ConvertF16S64,                                       F16,            U64,                                                                            )
+OPCODE(ConvertF16U32,                                       F16,            U32,                                                                            )
+OPCODE(ConvertF16U64,                                       F16,            U64,                                                                            )
+OPCODE(ConvertF32S32,                                       F32,            U32,                                                                            )
+OPCODE(ConvertF32S64,                                       F32,            U64,                                                                            )
+OPCODE(ConvertF32U32,                                       F32,            U32,                                                                            )
+OPCODE(ConvertF32U64,                                       F32,            U64,                                                                            )
+OPCODE(ConvertF64S32,                                       F64,            U32,                                                                            )
+OPCODE(ConvertF64S64,                                       F64,            U64,                                                                            )
+OPCODE(ConvertF64U32,                                       F64,            U32,                                                                            )
+OPCODE(ConvertF64U64,                                       F64,            U64,                                                                            )
+
+// Image operations
+OPCODE(BindlessImageSampleImplicitLod,                      F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BindlessImageSampleExplicitLod,                      F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BindlessImageSampleDrefImplicitLod,                  F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(BindlessImageSampleDrefExplicitLod,                  F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+
+OPCODE(BoundImageSampleImplicitLod,                         F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BoundImageSampleExplicitLod,                         F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(BoundImageSampleDrefImplicitLod,                     F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(BoundImageSampleDrefExplicitLod,                     F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+
+OPCODE(ImageSampleImplicitLod,                              F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(ImageSampleExplicitLod,                              F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
+OPCODE(ImageSampleDrefImplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
+OPCODE(ImageSampleDrefExplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         )
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
index 771094eb9a..8fea05f7be 100644
--- a/src/shader_recompiler/frontend/ir/reg.h
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -293,6 +293,17 @@ constexpr size_t NUM_REGS = 256;
     return reg + (-num);
 }
 
+[[nodiscard]] constexpr Reg operator++(Reg& reg) {
+    reg = reg + 1;
+    return reg;
+}
+
+[[nodiscard]] constexpr Reg operator++(Reg& reg, int) {
+    const Reg copy{reg};
+    reg = reg + 1;
+    return copy;
+}
+
 [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
     return static_cast<size_t>(reg);
 }
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 9b7e1480be..3602883d6f 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -75,6 +75,7 @@ private:
         f64 imm_f64;
     };
 };
+static_assert(std::is_trivially_copyable_v<Value>);
 
 template <IR::Type type_>
 class TypedValue : public Value {
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
index 5d0b91598e..f2a2ff331e 100644
--- a/src/shader_recompiler/frontend/maxwell/maxwell.inc
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -249,8 +249,8 @@ INST(SULD,         "SULD",           "1110 1011 000- ----")
 INST(SURED,        "SURED",          "1110 1011 010- ----")
 INST(SUST,         "SUST",           "1110 1011 001- ----")
 INST(SYNC,         "SYNC",           "1111 0000 1111 1---")
-INST(TEX,          "TEX",            "1100 00-- --11 1---")
-INST(TEX_b,        "TEX (b)",        "1101 1110 1011 1---")
+INST(TEX,          "TEX",            "1100 0--- ---- ----")
+INST(TEX_b,        "TEX (b)",        "1101 1110 10-- ----")
 INST(TEXS,         "TEXS",           "1101 -00- ---- ----")
 INST(TLD,          "TLD",            "1101 1100 --11 1---")
 INST(TLD_b,        "TLD (b)",        "1101 1101 --11 1---")
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index dbfc04f75e..b270bbccdb 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
         Optimization::SsaRewritePass(function.post_order_blocks);
     }
     Optimization::GlobalMemoryToStorageBufferPass(program);
+    Optimization::TexturePass(env, program);
     for (IR::Function& function : functions) {
         Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function);
         Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index fc6030e04e..ff429c1263 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -585,14 +585,6 @@ void TranslatorVisitor::SYNC(u64) {
     ThrowNotImplemented(Opcode::SYNC);
 }
 
-void TranslatorVisitor::TEX(u64) {
-    ThrowNotImplemented(Opcode::TEX);
-}
-
-void TranslatorVisitor::TEX_b(u64) {
-    ThrowNotImplemented(Opcode::TEX_b);
-}
-
 void TranslatorVisitor::TEXS(u64) {
     ThrowNotImplemented(Opcode::TEXS);
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp
new file mode 100644
index 0000000000..98d9f4c648
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp
@@ -0,0 +1,232 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Blod : u64 {
+    None,
+    LZ,
+    LB,
+    LL,
+    INVALIDBLOD4,
+    INVALIDBLOD5,
+    LBA,
+    LLA,
+};
+
+enum class TextureType : u64 {
+    _1D,
+    ARRAY_1D,
+    _2D,
+    ARRAY_2D,
+    _3D,
+    ARRAY_3D,
+    CUBE,
+    ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type, bool dc) {
+    switch (type) {
+    case TextureType::_1D:
+        return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D;
+    case TextureType::ARRAY_1D:
+        return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D;
+    case TextureType::_2D:
+        return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D;
+    case TextureType::ARRAY_2D:
+        return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D;
+    case TextureType::_3D:
+        return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D;
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube;
+    case TextureType::ARRAY_CUBE:
+        return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube;
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+    const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }};
+    switch (type) {
+    case TextureType::_1D:
+        return v.F(reg);
+    case TextureType::ARRAY_1D:
+        return v.ir.CompositeConstruct(read_array(), v.F(reg + 1));
+    case TextureType::_2D:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+    case TextureType::ARRAY_2D:
+        return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::_3D:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::ARRAY_3D:
+        throw NotImplementedException("3D array texture type");
+    case TextureType::CUBE:
+        return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+    case TextureType::ARRAY_CUBE:
+        return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
+    switch (blod) {
+    case Blod::None:
+        return v.ir.Imm32(0.0f);
+    case Blod::LZ:
+        return v.ir.Imm32(0.0f);
+    case Blod::LB:
+    case Blod::LL:
+    case Blod::LBA:
+    case Blod::LLA:
+        return v.F(reg++);
+    case Blod::INVALIDBLOD4:
+    case Blod::INVALIDBLOD5:
+        break;
+    }
+    throw NotImplementedException("Invalid blod {}", blod);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+    const IR::U32 value{v.X(reg++)};
+    switch (type) {
+    case TextureType::_1D:
+    case TextureType::ARRAY_1D:
+        return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4));
+    case TextureType::_2D:
+    case TextureType::ARRAY_2D:
+        return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)),
+                                       v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)));
+    case TextureType::_3D:
+    case TextureType::ARRAY_3D:
+        return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)),
+                                       v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)),
+                                       v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4)));
+    case TextureType::CUBE:
+    case TextureType::ARRAY_CUBE:
+        throw NotImplementedException("Illegal offset on CUBE sample");
+    }
+    throw NotImplementedException("Invalid texture type {}", type);
+}
+
+bool HasExplicitLod(Blod blod) {
+    switch (blod) {
+    case Blod::LL:
+    case Blod::LLA:
+    case Blod::LZ:
+        return true;
+    default:
+        return false;
+    }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
+          std::optional<u32> cbuf_offset) {
+    union {
+        u64 raw;
+        BitField<35, 1, u64> ndv;
+        BitField<49, 1, u64> nodep;
+        BitField<50, 1, u64> dc;
+        BitField<51, 3, IR::Pred> sparse_pred;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> coord_reg;
+        BitField<20, 8, IR::Reg> meta_reg;
+        BitField<28, 3, TextureType> type;
+        BitField<31, 4, u64> mask;
+    } const tex{insn};
+
+    if (lc) {
+        throw NotImplementedException("LC");
+    }
+    const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
+
+    IR::Reg meta_reg{tex.meta_reg};
+    IR::Value handle;
+    IR::Value offset;
+    IR::F32 dref;
+    IR::F32 lod_clamp;
+    if (cbuf_offset) {
+        handle = v.ir.Imm32(*cbuf_offset);
+    } else {
+        handle = v.X(meta_reg++);
+    }
+    const IR::F32 lod{MakeLod(v, meta_reg, blod)};
+    if (aoffi) {
+        offset = MakeOffset(v, meta_reg, tex.type);
+    }
+    if (tex.dc != 0) {
+        dref = v.F(meta_reg++);
+    }
+    IR::TextureInstInfo info{};
+    info.type.Assign(GetType(tex.type, tex.dc != 0));
+    info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
+    info.has_lod_clamp.Assign(lc ? 1 : 0);
+
+    const IR::Value sample{[&]() -> IR::Value {
+        if (tex.dc == 0) {
+            if (HasExplicitLod(blod)) {
+                return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info);
+            } else {
+                return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
+            }
+        }
+        if (HasExplicitLod(blod)) {
+            return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp,
+                                                   info);
+        } else {
+            return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
+                                                   info);
+        }
+    }()};
+
+    for (int element = 0; element < 4; ++element) {
+        if (((tex.mask >> element) & 1) == 0) {
+            continue;
+        }
+        IR::F32 value;
+        if (tex.dc != 0) {
+            value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
+        } else {
+            value = IR::F32{v.ir.CompositeExtract(sample, element)};
+        }
+        v.F(tex.dest_reg + element, value);
+    }
+    if (tex.sparse_pred != IR::Pred::PT) {
+        v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEX(u64 insn) {
+    union {
+        u64 raw;
+        BitField<54, 1, u64> aoffi;
+        BitField<55, 3, Blod> blod;
+        BitField<58, 1, u64> lc;
+        BitField<36, 13, u64> cbuf_offset;
+    } const tex{insn};
+
+    Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset));
+}
+
+void TranslatorVisitor::TEX_b(u64 insn) {
+    union {
+        u64 raw;
+        BitField<36, 1, u64> aoffi;
+        BitField<37, 3, Blod> blod;
+        BitField<40, 1, u64> lc;
+    } const tex{insn};
+
+    Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
-- 
cgit v1.2.3-70-g09d2