aboutsummaryrefslogtreecommitdiff
path: root/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs')
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs12
1 files changed, 10 insertions, 2 deletions
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index 543aab02..13d9fac6 100644
--- a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -2426,7 +2426,11 @@ namespace ARMeilleure.Instructions
}
else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
{
- Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true);
+ // RSQRTSS handles subnormals as zero, which differs from Arm, so we can't use it here.
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Sqrtss, GetVec(op.Rn));
+ res = context.AddIntrinsic(Intrinsic.X86Rcpss, res);
+ res = EmitSse41Round32Exp8OpF(context, res, scalar: true);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
}
@@ -2451,7 +2455,11 @@ namespace ARMeilleure.Instructions
}
else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
{
- Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false);
+ // RSQRTPS handles subnormals as zero, which differs from Arm, so we can't use it here.
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Sqrtps, GetVec(op.Rn));
+ res = context.AddIntrinsic(Intrinsic.X86Rcpps, res);
+ res = EmitSse41Round32Exp8OpF(context, res, scalar: false);
if (op.RegisterSize == RegisterSize.Simd64)
{