aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
diff options
context:
space:
mode:
authorLDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>2022-10-19 02:21:33 +0200
committerGitHub <noreply@github.com>2022-10-19 00:21:33 +0000
commit5af8ce7c38d0b5c910a271ff4a43313850b49a59 (patch)
tree553058c029d8d85193a05dc3d983192e5d1bc1b2 /ARMeilleure/Instructions/InstEmitSimdCvt32.cs
parent77c4291c3482c7adf707d2353128dded5a24bab3 (diff)
A64: Add fast path for Fcvtas_Gp/S/V, Fcvtau_Gp/S/V and Frinta_S/V in… (#3712)1.1.314
* A64: Add fast path for Fcvtas_Gp/S/V, Fcvtau_Gp/S/V and Frinta_S/V instructions; they use "Round to Nearest with Ties to Away" rounding mode not supported in x86. All instructions involved have been tested locally in both release and debug modes, in both lowcq and highcq. The titles Mario Strikers and Super Smash Bros. U. use these instructions intensively. * Update Ptc.cs * A32: Add fast path for Vcvta_RM, Vrinta_RM and Vrinta_V instructions aswell.
Diffstat (limited to 'ARMeilleure/Instructions/InstEmitSimdCvt32.cs')
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdCvt32.cs49
1 files changed, 41 insertions, 8 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
index 69ba4274..c76634eb 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@@ -203,6 +203,9 @@ namespace ARMeilleure.Instructions
FPRoundingMode roundMode;
switch (rm)
{
+ case 0b00:
+ roundMode = FPRoundingMode.ToNearestAway;
+ break;
case 0b01:
roundMode = FPRoundingMode.ToNearest;
break;
@@ -228,7 +231,7 @@ namespace ARMeilleure.Instructions
bool unsigned = op.Opc == 0;
int rm = op.Opc2 & 3;
- if (Optimizations.UseSse41 && rm != 0b00)
+ if (Optimizations.UseSse41)
{
EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
}
@@ -267,15 +270,21 @@ namespace ARMeilleure.Instructions
int rm = op.Opc2 & 3;
- if (Optimizations.UseSse2 && rm != 0b00)
+ if (Optimizations.UseSse41)
{
EmitScalarUnaryOpSimd32(context, (m) =>
{
- Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
-
FPRoundingMode roundMode = RMToRoundMode(rm);
- return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
+ return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: true);
+ }
});
}
else
@@ -305,7 +314,17 @@ namespace ARMeilleure.Instructions
// VRINTA (vector).
public static void Vrinta_V(ArmEmitterContext context)
{
- EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
+ if (Optimizations.UseSse41)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: false);
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
+ }
}
// VRINTM (vector).
@@ -413,7 +432,14 @@ namespace ARMeilleure.Instructions
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
- nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
Operand zero = context.VectorZero();
@@ -464,7 +490,14 @@ namespace ARMeilleure.Instructions
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
- nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
Operand zero = context.VectorZero();