From 3db2b3effa953ae66457b7a19b419fc4db2c4801 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sun, 11 Apr 2021 02:07:02 -0400
Subject: shader: Implement ATOM/S and RED

---
 src/shader_recompiler/frontend/ir/ir_emitter.cpp   | 200 ++++++++++++++++++-
 src/shader_recompiler/frontend/ir/ir_emitter.h     |  39 ++++
 .../frontend/ir/microinstruction.cpp               |  66 ++++++
 src/shader_recompiler/frontend/ir/opcodes.inc      |  70 +++++++
 .../impl/atomic_operations_global_memory.cpp       | 222 +++++++++++++++++++++
 .../impl/atomic_operations_shared_memory.cpp       | 110 ++++++++++
 .../maxwell/translate/impl/not_implemented.cpp     |  12 --
 7 files changed, 706 insertions(+), 13 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp

(limited to 'src/shader_recompiler/frontend')

diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 17be0c639e..a3339f624a 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -1284,6 +1284,204 @@ U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed)
     return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
 }
 
+U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) {
+    return is_signed ? SharedAtomicSMin(pointer_offset, value)
+                     : SharedAtomicUMin(pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) {
+    return is_signed ? SharedAtomicSMax(pointer_offset, value)
+                     : SharedAtomicUMax(pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value);
+}
+
+U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value);
+    default:
+        ThrowInvalidType(pointer_offset.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) {
+    return is_signed ? GlobalAtomicSMin(pointer_offset, value)
+                     : GlobalAtomicUMin(pointer_offset, value);
+}
+
+U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) {
+    return is_signed ? GlobalAtomicSMax(pointer_offset, value)
+                     : GlobalAtomicUMax(pointer_offset, value);
+}
+
+U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value);
+}
+
+U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) {
+    return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value);
+}
+
+U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
+}
+
+U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) {
+    switch (value.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value);
+    case Type::U64:
+        return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value);
+    default:
+        ThrowInvalidType(pointer_offset.Type());
+    }
+}
+
+F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
+                                  const FpControl control) {
+    return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
+                                      const FpControl control) {
+    return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
+                                      const FpControl control) {
+    return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
+                                      const FpControl control) {
+    return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value);
+}
+
 U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
     return Inst<U1>(Opcode::LogicalOr, a, b);
 }
@@ -1626,7 +1824,7 @@ Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInst
 }
 
 void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
-                            TextureInstInfo info) {
+                           TextureInstInfo info) {
     const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite};
     Inst(op, Flags{info}, handle, coords, color);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index ec60070efe..f9cbf1304c 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -228,6 +228,45 @@ public:
     [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
     [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
 
+    [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed);
+    [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed);
+    [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value);
+    [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value);
+
+    [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value,
+                                          bool is_signed);
+    [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value,
+                                          bool is_signed);
+    [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value);
+    [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value);
+    [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value);
+    [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value);
+
+    [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
+                                         const FpControl control = {});
+    [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
+                                             const FpControl control = {});
+    [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
+                                             const FpControl control = {});
+    [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
+                                             const FpControl control = {});
+
     [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
     [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
     [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 2df631791e..0f66c56270 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -93,6 +93,72 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::WriteSharedU32:
     case Opcode::WriteSharedU64:
     case Opcode::WriteSharedU128:
+    case Opcode::SharedAtomicIAdd32:
+    case Opcode::SharedAtomicSMin32:
+    case Opcode::SharedAtomicUMin32:
+    case Opcode::SharedAtomicSMax32:
+    case Opcode::SharedAtomicUMax32:
+    case Opcode::SharedAtomicInc32:
+    case Opcode::SharedAtomicDec32:
+    case Opcode::SharedAtomicAnd32:
+    case Opcode::SharedAtomicOr32:
+    case Opcode::SharedAtomicXor32:
+    case Opcode::SharedAtomicExchange32:
+    case Opcode::SharedAtomicExchange64:
+    case Opcode::GlobalAtomicIAdd32:
+    case Opcode::GlobalAtomicSMin32:
+    case Opcode::GlobalAtomicUMin32:
+    case Opcode::GlobalAtomicSMax32:
+    case Opcode::GlobalAtomicUMax32:
+    case Opcode::GlobalAtomicInc32:
+    case Opcode::GlobalAtomicDec32:
+    case Opcode::GlobalAtomicAnd32:
+    case Opcode::GlobalAtomicOr32:
+    case Opcode::GlobalAtomicXor32:
+    case Opcode::GlobalAtomicExchange32:
+    case Opcode::GlobalAtomicIAdd64:
+    case Opcode::GlobalAtomicSMin64:
+    case Opcode::GlobalAtomicUMin64:
+    case Opcode::GlobalAtomicSMax64:
+    case Opcode::GlobalAtomicUMax64:
+    case Opcode::GlobalAtomicAnd64:
+    case Opcode::GlobalAtomicOr64:
+    case Opcode::GlobalAtomicXor64:
+    case Opcode::GlobalAtomicExchange64:
+    case Opcode::GlobalAtomicAddF32:
+    case Opcode::GlobalAtomicAddF16x2:
+    case Opcode::GlobalAtomicAddF32x2:
+    case Opcode::GlobalAtomicMinF16x2:
+    case Opcode::GlobalAtomicMinF32x2:
+    case Opcode::GlobalAtomicMaxF16x2:
+    case Opcode::GlobalAtomicMaxF32x2:
+    case Opcode::StorageAtomicIAdd32:
+    case Opcode::StorageAtomicSMin32:
+    case Opcode::StorageAtomicUMin32:
+    case Opcode::StorageAtomicSMax32:
+    case Opcode::StorageAtomicUMax32:
+    case Opcode::StorageAtomicInc32:
+    case Opcode::StorageAtomicDec32:
+    case Opcode::StorageAtomicAnd32:
+    case Opcode::StorageAtomicOr32:
+    case Opcode::StorageAtomicXor32:
+    case Opcode::StorageAtomicExchange32:
+    case Opcode::StorageAtomicIAdd64:
+    case Opcode::StorageAtomicSMin64:
+    case Opcode::StorageAtomicUMin64:
+    case Opcode::StorageAtomicSMax64:
+    case Opcode::StorageAtomicUMax64:
+    case Opcode::StorageAtomicAnd64:
+    case Opcode::StorageAtomicOr64:
+    case Opcode::StorageAtomicXor64:
+    case Opcode::StorageAtomicExchange64:
+    case Opcode::StorageAtomicAddF32:
+    case Opcode::StorageAtomicAddF16x2:
+    case Opcode::StorageAtomicAddF32x2:
+    case Opcode::StorageAtomicMinF16x2:
+    case Opcode::StorageAtomicMinF32x2:
+    case Opcode::StorageAtomicMaxF16x2:
+    case Opcode::StorageAtomicMaxF32x2:
     case Opcode::BindlessImageWrite:
     case Opcode::BoundImageWrite:
     case Opcode::ImageWrite:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 86ea025605..dc776a73e6 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -321,6 +321,76 @@ OPCODE(INotEqual,                                           U1,             U32,
 OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
 OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                                            )
 
+// Atomic operations
+OPCODE(SharedAtomicIAdd32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicSMin32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicUMin32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicSMax32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicUMax32,                                  U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicInc32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicDec32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicAnd32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicOr32,                                    U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicXor32,                                   U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicExchange32,                              U32,            U32,            U32,                                                            )
+OPCODE(SharedAtomicExchange64,                              U64,            U32,            U64,                                                            )
+
+OPCODE(GlobalAtomicIAdd32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicSMin32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicUMin32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicSMax32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicUMax32,                                  U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicInc32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicDec32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicAnd32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicOr32,                                    U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicXor32,                                   U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicExchange32,                              U32,            U64,            U32,                                                            )
+OPCODE(GlobalAtomicIAdd64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicSMin64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicUMin64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicSMax64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicUMax64,                                  U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicAnd64,                                   U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicOr64,                                    U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicXor64,                                   U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicExchange64,                              U64,            U64,            U64,                                                            )
+OPCODE(GlobalAtomicAddF32,                                  F32,            U64,            F32,                                                            )
+OPCODE(GlobalAtomicAddF16x2,                                U32,            U64,            F16x2,                                                          )
+OPCODE(GlobalAtomicAddF32x2,                                U32,            U64,            F32x2,                                                          )
+OPCODE(GlobalAtomicMinF16x2,                                U32,            U64,            F16x2,                                                          )
+OPCODE(GlobalAtomicMinF32x2,                                U32,            U64,            F32x2,                                                          )
+OPCODE(GlobalAtomicMaxF16x2,                                U32,            U64,            F16x2,                                                          )
+OPCODE(GlobalAtomicMaxF32x2,                                U32,            U64,            F32x2,                                                          )
+
+OPCODE(StorageAtomicIAdd32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicSMin32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicUMin32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicSMax32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicUMax32,                                 U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicInc32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicDec32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicAnd32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicOr32,                                   U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicXor32,                                  U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicExchange32,                             U32,            U32,            U32,            U32,                                            )
+OPCODE(StorageAtomicIAdd64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicSMin64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicUMin64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicSMax64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicUMax64,                                 U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicAnd64,                                  U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicOr64,                                   U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicXor64,                                  U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicExchange64,                             U64,            U32,            U32,            U64,                                            )
+OPCODE(StorageAtomicAddF32,                                 F32,            U32,            U32,            F32,                                            )
+OPCODE(StorageAtomicAddF16x2,                               U32,            U32,            U32,            F16x2,                                          )
+OPCODE(StorageAtomicAddF32x2,                               U32,            U32,            U32,            F32x2,                                          )
+OPCODE(StorageAtomicMinF16x2,                               U32,            U32,            U32,            F16x2,                                          )
+OPCODE(StorageAtomicMinF32x2,                               U32,            U32,            U32,            F32x2,                                          )
+OPCODE(StorageAtomicMaxF16x2,                               U32,            U32,            U32,            F16x2,                                          )
+OPCODE(StorageAtomicMaxF32x2,                               U32,            U32,            U32,            F32x2,                                          )
+
 // Logical operations
 OPCODE(LogicalOr,                                           U1,             U1,             U1,                                                             )
 OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                                             )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 0000000000..7a32c5eb33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,222 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+    ADD,
+    MIN,
+    MAX,
+    INC,
+    DEC,
+    AND,
+    OR,
+    XOR,
+    EXCH,
+    SAFEADD,
+};
+
+enum class AtomSize : u64 {
+    U32,
+    S32,
+    U64,
+    F32,
+    F16x2,
+    S64,
+};
+
+IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
+                              AtomOp op, bool is_signed) {
+    switch (op) {
+    case AtomOp::ADD:
+        return ir.GlobalAtomicIAdd(offset, op_b);
+    case AtomOp::MIN:
+        return ir.GlobalAtomicIMin(offset, op_b, is_signed);
+    case AtomOp::MAX:
+        return ir.GlobalAtomicIMax(offset, op_b, is_signed);
+    case AtomOp::INC:
+        return ir.GlobalAtomicInc(offset, op_b);
+    case AtomOp::DEC:
+        return ir.GlobalAtomicDec(offset, op_b);
+    case AtomOp::AND:
+        return ir.GlobalAtomicAnd(offset, op_b);
+    case AtomOp::OR:
+        return ir.GlobalAtomicOr(offset, op_b);
+    case AtomOp::XOR:
+        return ir.GlobalAtomicXor(offset, op_b);
+    case AtomOp::EXCH:
+        return ir.GlobalAtomicExchange(offset, op_b);
+    default:
+        throw NotImplementedException("Integer Atom Operation {}", op);
+    }
+}
+
+IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
+                        AtomSize size) {
+    static constexpr IR::FpControl f16_control{
+        .no_contraction{false},
+        .rounding{IR::FpRounding::RN},
+        .fmz_mode{IR::FmzMode::DontCare},
+    };
+    static constexpr IR::FpControl f32_control{
+        .no_contraction{false},
+        .rounding{IR::FpRounding::RN},
+        .fmz_mode{IR::FmzMode::FTZ},
+    };
+    switch (op) {
+    case AtomOp::ADD:
+        return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
+                                     : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
+    case AtomOp::MIN:
+        return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
+    case AtomOp::MAX:
+        return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
+    default:
+        throw NotImplementedException("FP Atom Operation {}", op);
+    }
+}
+
+IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<28, 20, s64> addr_offset;
+        BitField<28, 20, u64> rz_addr_offset;
+        BitField<48, 1, u64> e;
+    } const mem{insn};
+
+    const IR::U64 address{[&]() -> IR::U64 {
+        if (mem.e == 0) {
+            return v.ir.UConvert(64, v.X(mem.addr_reg));
+        }
+        return v.L(mem.addr_reg);
+    }()};
+    const u64 addr_offset{[&]() -> u64 {
+        if (mem.addr_reg == IR::Reg::RZ) {
+            // When RZ is used, the address is an absolute address
+            return static_cast<u64>(mem.rz_addr_offset.Value());
+        } else {
+            return static_cast<u64>(mem.addr_offset.Value());
+        }
+    }()};
+    return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+
+bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
+    // TODO: SAFEADD
+    switch (size) {
+    case AtomSize::S32:
+    case AtomSize::U64:
+        return (op == AtomOp::INC || op == AtomOp::DEC);
+    case AtomSize::S64:
+        return !(op == AtomOp::MIN || op == AtomOp::MAX);
+    case AtomSize::F32:
+        return op != AtomOp::ADD;
+    case AtomSize::F16x2:
+        return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
+    default:
+        return false;
+    }
+}
+
+IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
+    switch (size) {
+    case AtomSize::U32:
+    case AtomSize::S32:
+    case AtomSize::F32:
+    case AtomSize::F16x2:
+        return ir.LoadGlobal32(offset);
+    case AtomSize::U64:
+    case AtomSize::S64:
+        return ir.PackUint2x32(ir.LoadGlobal64(offset));
+    default:
+        throw NotImplementedException("Atom Size {}", size);
+    }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
+    switch (size) {
+    case AtomSize::U32:
+    case AtomSize::S32:
+    case AtomSize::F16x2:
+        return v.X(dest_reg, IR::U32{result});
+    case AtomSize::U64:
+    case AtomSize::S64:
+        return v.L(dest_reg, IR::U64{result});
+    case AtomSize::F32:
+        return v.F(dest_reg, IR::F32{result});
+    default:
+        break;
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOM(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<20, 8, IR::Reg> src_reg_b;
+        BitField<49, 3, AtomSize> size;
+        BitField<52, 4, AtomOp> op;
+    } const atom{insn};
+
+    const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64};
+    const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64};
+    const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2};
+    const IR::U64 offset{AtomOffset(*this, insn)};
+    IR::Value result;
+
+    if (AtomOpNotApplicable(atom.size, atom.op)) {
+        result = LoadGlobal(ir, offset, atom.size);
+    } else if (!is_integer) {
+        if (atom.size == AtomSize::F32) {
+            result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size);
+        } else {
+            const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))};
+            result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size);
+        }
+    } else if (size_64) {
+        result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed);
+    } else {
+        result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed);
+    }
+    StoreResult(*this, atom.dest_reg, result, atom.size);
+}
+
+void TranslatorVisitor::RED(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> src_reg_b;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<20, 3, AtomSize> size;
+        BitField<23, 3, AtomOp> op;
+    } const red{insn};
+
+    if (AtomOpNotApplicable(red.size, red.op)) {
+        return;
+    }
+    const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64};
+    const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64};
+    const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2};
+    const IR::U64 offset{AtomOffset(*this, insn)};
+    if (!is_integer) {
+        if (red.size == AtomSize::F32) {
+            ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size);
+        } else {
+            const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))};
+            ApplyFpAtomOp(ir, offset, src_b, red.op, red.size);
+        }
+    } else if (size_64) {
+        ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed);
+    } else {
+        ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed);
+    }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 0000000000..8b974621e9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+    ADD,
+    MIN,
+    MAX,
+    INC,
+    DEC,
+    AND,
+    OR,
+    XOR,
+    EXCH,
+};
+
+enum class AtomsSize : u64 {
+    U32,
+    S32,
+    U64,
+};
+
+IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
+                        bool is_signed) {
+    switch (op) {
+    case AtomOp::ADD:
+        return ir.SharedAtomicIAdd(offset, op_b);
+    case AtomOp::MIN:
+        return ir.SharedAtomicIMin(offset, op_b, is_signed);
+    case AtomOp::MAX:
+        return ir.SharedAtomicIMax(offset, op_b, is_signed);
+    case AtomOp::INC:
+        return ir.SharedAtomicInc(offset, op_b);
+    case AtomOp::DEC:
+        return ir.SharedAtomicDec(offset, op_b);
+    case AtomOp::AND:
+        return ir.SharedAtomicAnd(offset, op_b);
+    case AtomOp::OR:
+        return ir.SharedAtomicOr(offset, op_b);
+    case AtomOp::XOR:
+        return ir.SharedAtomicXor(offset, op_b);
+    case AtomOp::EXCH:
+        return ir.SharedAtomicExchange(offset, op_b);
+    default:
+        throw NotImplementedException("Integer Atoms Operation {}", op);
+    }
+}
+
+IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
+    union {
+        u64 raw;
+        BitField<8, 8, IR::Reg> offset_reg;
+        BitField<30, 22, u64> absolute_offset;
+        BitField<30, 22, s64> relative_offset;
+    } const encoding{insn};
+
+    if (encoding.offset_reg == IR::Reg::RZ) {
+        return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
+    } else {
+        const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
+        return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+    }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
+    switch (size) {
+    case AtomsSize::U32:
+    case AtomsSize::S32:
+        return v.X(dest_reg, IR::U32{result});
+    case AtomsSize::U64:
+        return v.L(dest_reg, IR::U64{result});
+    default:
+        break;
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOMS(u64 insn) {
+    union {
+        u64 raw;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> addr_reg;
+        BitField<20, 8, IR::Reg> src_reg_b;
+        BitField<28, 2, AtomsSize> size;
+        BitField<52, 4, AtomOp> op;
+    } const atoms{insn};
+
+    const bool size_64{atoms.size == AtomsSize::U64};
+    if (size_64 && atoms.op != AtomOp::EXCH) {
+        throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
+    }
+    const bool is_signed{atoms.size == AtomsSize::S32};
+    const IR::U32 offset{AtomsOffset(*this, insn)};
+
+    IR::Value result;
+    if (size_64) {
+        result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
+    } else {
+        result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
+    }
+    StoreResult(*this, atoms.dest_reg, result, atoms.size);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 3279412234..aebe3072a5 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) {
     ThrowNotImplemented(Opcode::ATOM_cas);
 }
 
-void TranslatorVisitor::ATOM(u64) {
-    ThrowNotImplemented(Opcode::ATOM);
-}
-
 void TranslatorVisitor::ATOMS_cas(u64) {
     ThrowNotImplemented(Opcode::ATOMS_cas);
 }
 
-void TranslatorVisitor::ATOMS(u64) {
-    ThrowNotImplemented(Opcode::ATOMS);
-}
-
 void TranslatorVisitor::B2R(u64) {
     ThrowNotImplemented(Opcode::B2R);
 }
@@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) {
     ThrowNotImplemented(Opcode::RAM);
 }
 
-void TranslatorVisitor::RED(u64) {
-    ThrowNotImplemented(Opcode::RED);
-}
-
 void TranslatorVisitor::RET(u64) {
     ThrowNotImplemented(Opcode::RET);
 }
-- 
cgit v1.2.3-70-g09d2