aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure
diff options
context:
space:
mode:
authorgdkchan <gab.dark.100@gmail.com>2021-01-27 20:23:00 -0300
committerGitHub <noreply@github.com>2021-01-28 10:23:00 +1100
commitdcce4070719a3798bb96d3aa02b9ba02a7fecc16 (patch)
treebf33f4c6a79b9261327227965b32685051653124 /ARMeilleure
parent98d0240ce66f682e10efc3f826a61fc22b633183 (diff)
Lower precision of estimate instruction results to match Arm behavior (#1943)
* Lower precision of estimate instruction results to match Arm behavior * PTC version update * Nits
Diffstat (limited to 'ARMeilleure')
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic.cs83
-rw-r--r--ARMeilleure/Translation/PTC/Ptc.cs2
2 files changed, 66 insertions, 19 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index f18b91cf..deaa6f5a 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -1475,9 +1475,11 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
- if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
{
- EmitScalarUnaryOpF(context, Intrinsic.X86Rcpss, 0);
+ Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
}
else
{
@@ -1494,9 +1496,16 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
- if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
{
- EmitVectorUnaryOpF(context, Intrinsic.X86Rcpps, 0);
+ Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
}
else
{
@@ -1652,7 +1661,7 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.UseSse41)
{
- EmitScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
}
else
{
@@ -1667,7 +1676,7 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.UseSse41)
{
- EmitVectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
}
else
{
@@ -1682,7 +1691,7 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.UseSse41)
{
- EmitScalarRoundOpF(context, FPRoundingMode.ToNearest);
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest);
}
else
{
@@ -1697,7 +1706,7 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.UseSse41)
{
- EmitVectorRoundOpF(context, FPRoundingMode.ToNearest);
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest);
}
else
{
@@ -1712,7 +1721,7 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.UseSse41)
{
- EmitScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
}
else
{
@@ -1727,7 +1736,7 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.UseSse41)
{
- EmitVectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
}
else
{
@@ -1778,7 +1787,7 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.UseSse41)
{
- EmitScalarRoundOpF(context, FPRoundingMode.TowardsZero);
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero);
}
else
{
@@ -1793,7 +1802,7 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.UseSse41)
{
- EmitVectorRoundOpF(context, FPRoundingMode.TowardsZero);
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero);
}
else
{
@@ -1810,9 +1819,11 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
- if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
{
- EmitScalarUnaryOpF(context, Intrinsic.X86Rsqrtss, 0);
+ Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
}
else
{
@@ -1829,9 +1840,16 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
- if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
{
- EmitVectorUnaryOpF(context, Intrinsic.X86Rsqrtps, 0);
+ Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
}
else
{
@@ -3498,7 +3516,7 @@ namespace ARMeilleure.Instructions
return context.ConditionalSelect(cmp, op1, op2);
}
- private static void EmitScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ private static void EmitSse41ScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
@@ -3520,7 +3538,7 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), res);
}
- private static void EmitVectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ private static void EmitSse41VectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
@@ -3538,6 +3556,35 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), res);
}
+ private static Operand EmitSse41FP32RoundExp8(ArmEmitterContext context, Operand value, bool scalar)
+ {
+ Operand roundMask;
+ Operand truncMask;
+ Operand expMask;
+
+ if (scalar)
+ {
+ roundMask = X86GetScalar(context, 0x4000);
+ truncMask = X86GetScalar(context, unchecked((int)0xFFFF8000));
+ expMask = X86GetScalar(context, 0x7F800000);
+ }
+ else
+ {
+ roundMask = X86GetAllElements(context, 0x4000);
+ truncMask = X86GetAllElements(context, unchecked((int)0xFFFF8000));
+ expMask = X86GetAllElements(context, 0x7F800000);
+ }
+
+ Operand oValue = value;
+ Operand masked = context.AddIntrinsic(Intrinsic.X86Pand, value, expMask);
+ Operand isNaNInf = context.AddIntrinsic(Intrinsic.X86Pcmpeqw, masked, expMask);
+
+ value = context.AddIntrinsic(Intrinsic.X86Paddw, value, roundMask);
+ value = context.AddIntrinsic(Intrinsic.X86Pand, value, truncMask);
+
+ return context.AddIntrinsic(Intrinsic.X86Blendvps, value, oValue, isNaNInf);
+ }
+
public static void EmitSse2VectorIsNaNOpF(
ArmEmitterContext context,
Operand opF,
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index 75a801e5..fd69077b 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -22,7 +22,7 @@ namespace ARMeilleure.Translation.PTC
{
private const string HeaderMagic = "PTChd";
- private const int InternalVersion = 1956; //! To be incremented manually for each change to the ARMeilleure project.
+ private const int InternalVersion = 1943; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";