From 543d75a587ee2197b83762dba393c1d525c601fc Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Tue, 9 Apr 2024 14:34:14 -0300
Subject: CPU: Produce non-inf results for RSQRTE instruction with subnormal
 inputs (#6634)

* CPU: Produce non-inf results for RSQRTE instruction with subnormal inputs

* PPTC version bump
---
 src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs | 12 ++++++++++--
 src/ARMeilleure/Translation/PTC/Ptc.cs                 |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index 543aab02..13d9fac6 100644
--- a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -2426,7 +2426,11 @@ namespace ARMeilleure.Instructions
             }
             else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
             {
-                Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true);
+                // RSQRTSS handles subnormals as zero, which differs from Arm, so we can't use it here.
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Sqrtss, GetVec(op.Rn));
+                res = context.AddIntrinsic(Intrinsic.X86Rcpss, res);
+                res = EmitSse41Round32Exp8OpF(context, res, scalar: true);
 
                 context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
             }
@@ -2451,7 +2455,11 @@ namespace ARMeilleure.Instructions
             }
             else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
             {
-                Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false);
+                // RSQRTPS handles subnormals as zero, which differs from Arm, so we can't use it here.
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Sqrtps, GetVec(op.Rn));
+                res = context.AddIntrinsic(Intrinsic.X86Rcpps, res);
+                res = EmitSse41Round32Exp8OpF(context, res, scalar: false);
 
                 if (op.RegisterSize == RegisterSize.Simd64)
                 {
diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs
index f987284f..58f06534 100644
--- a/src/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/src/ARMeilleure/Translation/PTC/Ptc.cs
@@ -29,7 +29,7 @@ namespace ARMeilleure.Translation.PTC
         private const string OuterHeaderMagicString = "PTCohd\0\0";
         private const string InnerHeaderMagicString = "PTCihd\0\0";
 
-        private const uint InternalVersion = 6613; //! To be incremented manually for each change to the ARMeilleure project.
+        private const uint InternalVersion = 6634; //! To be incremented manually for each change to the ARMeilleure project.
 
         private const string ActualDir = "0";
         private const string BackupDir = "1";
-- 
cgit v1.2.3-70-g09d2