shader/shift: Implement SHR wrapped and clamped variants

Nvidia defaults to wrapped shifts, but this is undefined behaviour on OpenGL's spec. Explicitly mask/clamp according to what the guest shader requires.
author: ReinUsesLisp <reinuseslisp@airmail.cc> 2019-08-31 17:06:00 -0300
committer: ReinUsesLisp <reinuseslisp@airmail.cc> 2019-09-04 01:55:24 -0300
commit: 77ef4fa9078b56c3fcaded3a618cf95fe21e66d4 (patch)
tree: 7d21a7ab8db5c806f93f4d345a887e4f1d2c7a2f /src
parent: 922c7f4e510c12a7e207ba08904c2523d99edd55 (diff)
2 files changed, 17 insertions, 6 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c3678b9eab..bd8c1ada03 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -675,6 +675,10 @@ union Instruction {
     } shift;
 
     union {
+        BitField<39, 1, u64> wrap;
+    } shr;
+
+    union {
         BitField<39, 5, u64> shift_amount;
         BitField<48, 1, u64> negate_b;
         BitField<49, 1, u64> negate_a;
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 2ac16eeb06..f6ee68a54c 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -17,8 +17,8 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    const Node op_a = GetRegister(instr.gpr8);
-    const Node op_b = [&]() {
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = [&]() {
         if (instr.is_b_imm) {
             return Immediate(instr.alu.GetSignedImm20_20());
         } else if (instr.is_b_gpr) {
@@ -32,16 +32,23 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
     case OpCode::Id::SHR_C:
     case OpCode::Id::SHR_R:
     case OpCode::Id::SHR_IMM: {
-        const Node value = SignedOperation(OperationCode::IArithmeticShiftRight,
-                                           instr.shift.is_signed, PRECISE, op_a, op_b);
+        if (instr.shr.wrap) {
+            op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f));
+        } else {
+            op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0));
+            op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31));
+        }
+
+        Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
+                                     std::move(op_a), std::move(op_b));
         SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
+        SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
     case OpCode::Id::SHL_C:
     case OpCode::Id::SHL_R:
     case OpCode::Id::SHL_IMM: {
-        const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b);
+        const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
         SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
author	ReinUsesLisp <reinuseslisp@airmail.cc>	2019-08-31 17:06:00 -0300
committer	ReinUsesLisp <reinuseslisp@airmail.cc>	2019-09-04 01:55:24 -0300
commit	77ef4fa9078b56c3fcaded3a618cf95fe21e66d4 (patch)
tree	7d21a7ab8db5c806f93f4d345a887e4f1d2c7a2f /src
parent	922c7f4e510c12a7e207ba08904c2523d99edd55 (diff)