aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic.cs52
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdCvt.cs126
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdCvt32.cs49
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHelper.cs55
-rw-r--r--ARMeilleure/State/FPRoundingMode.cs5
-rw-r--r--ARMeilleure/Translation/PTC/Ptc.cs2
6 files changed, 243 insertions, 46 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index a35e28a1..b91c522e 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -1617,18 +1617,32 @@ namespace ARMeilleure.Instructions
public static void Frinta_S(ArmEmitterContext context)
{
- EmitScalarUnaryOpF(context, (op1) =>
+ if (Optimizations.UseSse41)
{
- return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
- });
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearestAway);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
}
public static void Frinta_V(ArmEmitterContext context)
{
- EmitVectorUnaryOpF(context, (op1) =>
+ if (Optimizations.UseSse41)
{
- return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
- });
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearestAway);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
}
public static void Frinti_S(ArmEmitterContext context)
@@ -3516,9 +3530,18 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
- Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss;
+ Operand res;
- Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss;
+
+ res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: true);
+ }
if ((op.Size & 1) != 0)
{
@@ -3538,9 +3561,18 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
- Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps;
+ Operand res;
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps;
- Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+ res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: false);
+ }
if (op.RegisterSize == RegisterSize.Simd64)
{
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
index c8c427b7..9329f2b7 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
@@ -164,32 +164,74 @@ namespace ARMeilleure.Instructions
public static void Fcvtas_Gp(ArmEmitterContext context)
{
- EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
}
public static void Fcvtas_S(ArmEmitterContext context)
{
- EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true);
+ }
}
public static void Fcvtas_V(ArmEmitterContext context)
{
- EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false);
+ }
}
public static void Fcvtau_Gp(ArmEmitterContext context)
{
- EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
}
public static void Fcvtau_S(ArmEmitterContext context)
{
- EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true);
+ }
}
public static void Fcvtau_V(ArmEmitterContext context)
{
- EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false);
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false);
+ }
}
public static void Fcvtl_V(ArmEmitterContext context)
@@ -1223,7 +1265,14 @@ namespace ARMeilleure.Instructions
nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
}
- nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
@@ -1265,7 +1314,14 @@ namespace ARMeilleure.Instructions
nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
}
- nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
@@ -1314,7 +1370,14 @@ namespace ARMeilleure.Instructions
nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
}
- nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
Operand zero = context.VectorZero();
@@ -1369,7 +1432,14 @@ namespace ARMeilleure.Instructions
nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
}
- nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
Operand zero = context.VectorZero();
@@ -1424,7 +1494,14 @@ namespace ARMeilleure.Instructions
nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
}
- nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
@@ -1464,7 +1541,14 @@ namespace ARMeilleure.Instructions
nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
}
- nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
@@ -1512,7 +1596,14 @@ namespace ARMeilleure.Instructions
nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
}
- nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
Operand zero = context.VectorZero();
@@ -1567,7 +1658,14 @@ namespace ARMeilleure.Instructions
nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
}
- nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
Operand zero = context.VectorZero();
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
index 69ba4274..c76634eb 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@@ -203,6 +203,9 @@ namespace ARMeilleure.Instructions
FPRoundingMode roundMode;
switch (rm)
{
+ case 0b00:
+ roundMode = FPRoundingMode.ToNearestAway;
+ break;
case 0b01:
roundMode = FPRoundingMode.ToNearest;
break;
@@ -228,7 +231,7 @@ namespace ARMeilleure.Instructions
bool unsigned = op.Opc == 0;
int rm = op.Opc2 & 3;
- if (Optimizations.UseSse41 && rm != 0b00)
+ if (Optimizations.UseSse41)
{
EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
}
@@ -267,15 +270,21 @@ namespace ARMeilleure.Instructions
int rm = op.Opc2 & 3;
- if (Optimizations.UseSse2 && rm != 0b00)
+ if (Optimizations.UseSse41)
{
EmitScalarUnaryOpSimd32(context, (m) =>
{
- Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
-
FPRoundingMode roundMode = RMToRoundMode(rm);
- return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
+ return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: true);
+ }
});
}
else
@@ -305,7 +314,17 @@ namespace ARMeilleure.Instructions
// VRINTA (vector).
public static void Vrinta_V(ArmEmitterContext context)
{
- EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
+ if (Optimizations.UseSse41)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: false);
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
+ }
}
// VRINTM (vector).
@@ -413,7 +432,14 @@ namespace ARMeilleure.Instructions
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
- nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
Operand zero = context.VectorZero();
@@ -464,7 +490,14 @@ namespace ARMeilleure.Instructions
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
- nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
Operand zero = context.VectorZero();
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
index 49c17560..0e7af794 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -33,6 +33,14 @@ namespace ARMeilleure.Instructions
};
public static readonly long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0;
+
+ public static ulong X86GetGf2p8LogicalShiftLeft(int shift)
+ {
+ ulong identity = (0b00000001UL << 56) | (0b00000010UL << 48) | (0b00000100UL << 40) | (0b00001000UL << 32) |
+ (0b00010000UL << 24) | (0b00100000UL << 16) | (0b01000000UL << 8) | (0b10000000UL << 0);
+
+ return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8);
+ }
#endregion
#region "X86 SSE Intrinsics"
@@ -243,19 +251,44 @@ namespace ARMeilleure.Instructions
throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
}
- public static ulong X86GetGf2p8LogicalShiftLeft(int shift)
+ public static Operand EmitSse41RoundToNearestWithTiesToAwayOpF(ArmEmitterContext context, Operand n, bool scalar)
{
- ulong identity =
- (0b00000001UL << 56) |
- (0b00000010UL << 48) |
- (0b00000100UL << 40) |
- (0b00001000UL << 32) |
- (0b00010000UL << 24) |
- (0b00100000UL << 16) |
- (0b01000000UL << 8) |
- (0b10000000UL << 0);
+ Debug.Assert(n.Type == OperandType.V128);
- return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8);
+ Operand nCopy = context.Copy(n);
+
+ Operand rC = Const(X86GetRoundControl(FPRoundingMode.TowardsZero));
+
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ Operand signMask = scalar ? X86GetScalar(context, int.MinValue) : X86GetAllElements(context, int.MinValue);
+ signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
+
+ // 0x3EFFFFFF == BitConverter.SingleToInt32Bits(0.5f) - 1
+ Operand valueMask = scalar ? X86GetScalar(context, 0x3EFFFFFF) : X86GetAllElements(context, 0x3EFFFFFF);
+ valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addss : Intrinsic.X86Addps, nCopy, valueMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundss : Intrinsic.X86Roundps, nCopy, rC);
+ }
+ else
+ {
+ Operand signMask = scalar ? X86GetScalar(context, long.MinValue) : X86GetAllElements(context, long.MinValue);
+ signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
+
+ // 0x3FDFFFFFFFFFFFFFL == BitConverter.DoubleToInt64Bits(0.5d) - 1L
+ Operand valueMask = scalar ? X86GetScalar(context, 0x3FDFFFFFFFFFFFFFL) : X86GetAllElements(context, 0x3FDFFFFFFFFFFFFFL);
+ valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addsd : Intrinsic.X86Addpd, nCopy, valueMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundsd : Intrinsic.X86Roundpd, nCopy, rC);
+ }
+
+ return nCopy;
}
public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.).
diff --git a/ARMeilleure/State/FPRoundingMode.cs b/ARMeilleure/State/FPRoundingMode.cs
index ee4f8766..8d757a15 100644
--- a/ARMeilleure/State/FPRoundingMode.cs
+++ b/ARMeilleure/State/FPRoundingMode.cs
@@ -2,9 +2,10 @@ namespace ARMeilleure.State
{
public enum FPRoundingMode
{
- ToNearest = 0,
+ ToNearest = 0, // With ties to even.
TowardsPlusInfinity = 1,
TowardsMinusInfinity = 2,
- TowardsZero = 3
+ TowardsZero = 3,
+ ToNearestAway = 4 // With ties to away.
}
}
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index 1515713b..70f6e012 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
- private const uint InternalVersion = 3710; //! To be incremented manually for each change to the ARMeilleure project.
+ private const uint InternalVersion = 3713; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";