diff options
author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2022-10-19 02:21:33 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-19 00:21:33 +0000 |
commit | 5af8ce7c38d0b5c910a271ff4a43313850b49a59 (patch) | |
tree | 553058c029d8d85193a05dc3d983192e5d1bc1b2 /ARMeilleure/Instructions/InstEmitSimdArithmetic.cs | |
parent | 77c4291c3482c7adf707d2353128dded5a24bab3 (diff) |
A64: Add fast path for Fcvtas_Gp/S/V, Fcvtau_Gp/S/V and Frinta_S/V in… (#3712)1.1.314
* A64: Add fast path for Fcvtas_Gp/S/V, Fcvtau_Gp/S/V and Frinta_S/V instructions;
they use "Round to Nearest with Ties to Away" rounding mode not supported in x86.
All instructions involved have been tested locally in both release and debug modes, in both lowcq and highcq.
The titles Mario Strikers and Super Smash Bros. U. use these instructions intensively.
* Update Ptc.cs
* A32: Add fast path for Vcvta_RM, Vrinta_RM and Vrinta_V instructions aswell.
Diffstat (limited to 'ARMeilleure/Instructions/InstEmitSimdArithmetic.cs')
-rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdArithmetic.cs | 52 |
1 files changed, 42 insertions, 10 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index a35e28a1..b91c522e 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -1617,18 +1617,32 @@ namespace ARMeilleure.Instructions public static void Frinta_S(ArmEmitterContext context) { - EmitScalarUnaryOpF(context, (op1) => + if (Optimizations.UseSse41) { - return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); - }); + EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearestAway); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); + }); + } } public static void Frinta_V(ArmEmitterContext context) { - EmitVectorUnaryOpF(context, (op1) => + if (Optimizations.UseSse41) { - return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); - }); + EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearestAway); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); + }); + } } public static void Frinti_S(ArmEmitterContext context) @@ -3516,9 +3530,18 @@ namespace ARMeilleure.Instructions Operand n = GetVec(op.Rn); - Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss; + Operand res; - Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + if (roundMode != FPRoundingMode.ToNearestAway) + { + Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss; + + res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + } + else + { + res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: true); + } if ((op.Size & 1) != 0) { @@ -3538,9 +3561,18 @@ namespace ARMeilleure.Instructions Operand n = GetVec(op.Rn); - Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps; + Operand res; + + if (roundMode != FPRoundingMode.ToNearestAway) + { + Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps; - Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + } + else + { + res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: false); + } if (op.RegisterSize == RegisterSize.Simd64) { |