From 980cafdc27444484a2a2794be5de92ea18de6e27 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Wed, 3 Mar 2021 00:41:05 -0500
Subject: shader: Implement LOP and LOP3

---
 src/shader_recompiler/frontend/maxwell/maxwell.inc |   4 +-
 .../maxwell/translate/impl/common_funcs.cpp        |  25 ++++-
 .../frontend/maxwell/translate/impl/common_funcs.h |   2 +
 .../frontend/maxwell/translate/impl/impl.h         |   7 ++
 .../maxwell/translate/impl/logic_operation.cpp     |  77 ++++++++++++++
 .../translate/impl/logic_operation_three_input.cpp | 117 +++++++++++++++++++++
 .../maxwell/translate/impl/not_implemented.cpp     |  24 -----
 7 files changed, 225 insertions(+), 31 deletions(-)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp

(limited to 'src/shader_recompiler/frontend')

diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
index 1515285bf8..5d0b91598e 100644
--- a/src/shader_recompiler/frontend/maxwell/maxwell.inc
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -178,8 +178,8 @@ INST(LOP_reg,      "LOP (reg)",      "0101 1100 0100 0---")
 INST(LOP_cbuf,     "LOP (cbuf)",     "0100 1100 0100 0---")
 INST(LOP_imm,      "LOP (imm)",      "0011 100- 0100 0---")
 INST(LOP3_reg,     "LOP3 (reg)",     "0101 1011 1110 0---")
-INST(LOP3_cbuf,    "LOP3 (cbuf)",    "0011 11-- ---- ----")
-INST(LOP3_imm,     "LOP3 (imm)",     "0000 001- ---- ----")
+INST(LOP3_cbuf,    "LOP3 (cbuf)",    "0000 001- ---- ----")
+INST(LOP3_imm,     "LOP3 (imm)",     "0011 11-- ---- ----")
 INST(LOP32I,       "LOP32I",         "0000 01-- ---- ----")
 INST(MEMBAR,       "MEMBAR",         "1110 1111 1001 1---")
 INST(MOV_reg,      "MOV (reg)",      "0101 1100 1001 1---")
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
index 62f825a929..9d4ac2e365 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
@@ -5,9 +5,8 @@
 #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
 
 namespace Shader::Maxwell {
-[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
-                                    const IR::U32& operand_2, CompareOp compare_op,
-                                    bool is_signed) {
+IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+                      CompareOp compare_op, bool is_signed) {
     switch (compare_op) {
     case CompareOp::False:
         return ir.Imm1(false);
@@ -30,8 +29,8 @@ namespace Shader::Maxwell {
     }
 }
 
-[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
-                                      const IR::U1& predicate_2, BooleanOp bop) {
+IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
+                        BooleanOp bop) {
     switch (bop) {
     case BooleanOp::AND:
         return ir.LogicalAnd(predicate_1, predicate_2);
@@ -43,4 +42,20 @@ namespace Shader::Maxwell {
         throw NotImplementedException("Invalid bop {}", bop);
     }
 }
+
+IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
+    switch (op) {
+    case PredicateOp::False:
+        return ir.Imm1(false);
+    case PredicateOp::True:
+        return ir.Imm1(true);
+    case PredicateOp::Zero:
+        return ir.IEqual(result, ir.Imm32(0));
+    case PredicateOp::NonZero:
+        return ir.INotEqual(result, ir.Imm32(0));
+    default:
+        throw NotImplementedException("Invalid Predicate operation {}", op);
+    }
+}
+
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
index 61e13fa189..c9ae5c5003 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
@@ -13,4 +13,6 @@ namespace Shader::Maxwell {
 
 [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
                                       const IR::U1& predicate_2, BooleanOp bop);
+
+[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
 } // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index ad09ade7c2..c6253c40c1 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -28,6 +28,13 @@ enum class BooleanOp : u64 {
     XOR,
 };
 
+enum class PredicateOp : u64 {
+    False,
+    True,
+    Zero,
+    NonZero,
+};
+
 class TranslatorVisitor {
 public:
     explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
new file mode 100644
index 0000000000..e786a388e9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
@@ -0,0 +1,77 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class LogicalOp : u64 {
+    AND,
+    OR,
+    XOR,
+    PASS_B,
+};
+
+[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
+                                       const IR::U32& operand_2, LogicalOp op) {
+    switch (op) {
+    case LogicalOp::AND:
+        return ir.BitwiseAnd(operand_1, operand_2);
+    case LogicalOp::OR:
+        return ir.BitwiseOr(operand_1, operand_2);
+    case LogicalOp::XOR:
+        return ir.BitwiseXor(operand_1, operand_2);
+    case LogicalOp::PASS_B:
+        return operand_2;
+    default:
+        throw NotImplementedException("Invalid Logical operation {}", op);
+    }
+}
+
+void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+        BitField<39, 1, u64> neg_a;
+        BitField<40, 1, u64> neg_b;
+        BitField<41, 2, LogicalOp> bit_op;
+        BitField<43, 1, u64> x;
+        BitField<44, 2, PredicateOp> pred_op;
+        BitField<48, 3, IR::Pred> pred;
+    } const lop{insn};
+
+    if (lop.x != 0) {
+        throw NotImplementedException("LOP X");
+    }
+    IR::U32 op_a{v.X(lop.src_reg)};
+    if (lop.neg_a != 0) {
+        op_a = v.ir.BitwiseNot(op_a);
+    }
+    if (lop.neg_b != 0) {
+        op_b = v.ir.BitwiseNot(op_b);
+    }
+
+    const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, lop.bit_op)};
+    const IR::U1 pred_result{PredicateOperation(v.ir, result, lop.pred_op)};
+    v.X(lop.dest_reg, result);
+    v.ir.SetPred(lop.pred, pred_result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP_reg(u64 insn) {
+    LOP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::LOP_cbuf(u64 insn) {
+    LOP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::LOP_imm(u64 insn) {
+    LOP(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
new file mode 100644
index 0000000000..256c475041
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -0,0 +1,117 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
+// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
+IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
+                 u64 ttbl) {
+    IR::U32 r{ir.Imm32(0)};
+    const IR::U32 not_a{ir.BitwiseNot(a)};
+    const IR::U32 not_b{ir.BitwiseNot(b)};
+    const IR::U32 not_c{ir.BitwiseNot(c)};
+    if (ttbl & 0x01) {
+        // r |= ~a & ~b & ~c;
+        const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x02) {
+        // r |= ~a & ~b & c;
+        const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+        const auto rhs{ir.BitwiseAnd(lhs, c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x04) {
+        // r |= ~a & b & ~c;
+        const auto lhs{ir.BitwiseAnd(not_a, b)};
+        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x08) {
+        // r |= ~a & b & c;
+        const auto lhs{ir.BitwiseAnd(not_a, b)};
+        const auto rhs{ir.BitwiseAnd(lhs, c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x10) {
+        // r |= a & ~b & ~c;
+        const auto lhs{ir.BitwiseAnd(a, not_b)};
+        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x20) {
+        // r |= a & ~b & c;
+        const auto lhs{ir.BitwiseAnd(a, not_b)};
+        const auto rhs{ir.BitwiseAnd(lhs, c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x40) {
+        // r |= a & b & ~c;
+        const auto lhs{ir.BitwiseAnd(a, b)};
+        const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    if (ttbl & 0x80) {
+        // r |= a & b & c;
+        const auto lhs{ir.BitwiseAnd(a, b)};
+        const auto rhs{ir.BitwiseAnd(lhs, c)};
+        r = ir.BitwiseOr(r, rhs);
+    }
+    return r;
+}
+
+IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
+    union {
+        u64 insn;
+        BitField<0, 8, IR::Reg> dest_reg;
+        BitField<8, 8, IR::Reg> src_reg;
+    } const lop3{insn};
+
+    const IR::U32 op_a{v.X(lop3.src_reg)};
+    const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
+    v.X(lop3.dest_reg, result);
+    return result;
+}
+
+u64 GetLut48(u64 insn) {
+    union {
+        u64 raw;
+        BitField<48, 8, u64> lut;
+    } const lut{insn};
+    return lut.lut;
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP3_reg(u64 insn) {
+    union {
+        u64 insn;
+        BitField<28, 8, u64> lut;
+        BitField<38, 1, u64> x;
+        BitField<36, 2, PredicateOp> pred_op;
+        BitField<48, 3, IR::Pred> pred;
+    } const lop3{insn};
+
+    if (lop3.x != 0) {
+        throw NotImplementedException("LOP3 X");
+    }
+    const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
+    const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
+    ir.SetPred(lop3.pred, pred_result);
+}
+
+void TranslatorVisitor::LOP3_cbuf(u64 insn) {
+    LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
+}
+
+void TranslatorVisitor::LOP3_imm(u64 insn) {
+    LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index c93304a679..a0535f1c2f 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -493,30 +493,6 @@ void TranslatorVisitor::LONGJMP(u64) {
     ThrowNotImplemented(Opcode::LONGJMP);
 }
 
-void TranslatorVisitor::LOP_reg(u64) {
-    ThrowNotImplemented(Opcode::LOP_reg);
-}
-
-void TranslatorVisitor::LOP_cbuf(u64) {
-    ThrowNotImplemented(Opcode::LOP_cbuf);
-}
-
-void TranslatorVisitor::LOP_imm(u64) {
-    ThrowNotImplemented(Opcode::LOP_imm);
-}
-
-void TranslatorVisitor::LOP3_reg(u64) {
-    ThrowNotImplemented(Opcode::LOP3_reg);
-}
-
-void TranslatorVisitor::LOP3_cbuf(u64) {
-    ThrowNotImplemented(Opcode::LOP3_cbuf);
-}
-
-void TranslatorVisitor::LOP3_imm(u64) {
-    ThrowNotImplemented(Opcode::LOP3_imm);
-}
-
 void TranslatorVisitor::LOP32I(u64) {
     ThrowNotImplemented(Opcode::LOP32I);
 }
-- 
cgit v1.2.3-70-g09d2