aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure
diff options
context:
space:
mode:
authorLDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>2021-01-19 23:12:33 +0100
committerGitHub <noreply@github.com>2021-01-20 09:12:33 +1100
commitc3e0c41da3cef647b8bea54f77103fbad85098ba (patch)
tree7548024191ab10cfac85844ccb2a77312c6d583a /ARMeilleure
parentb8353f5639cd61cfe33bb3af8f93988f31b3e444 (diff)
CPU (A64): Add Fmaxnmp & Fminnmp Scalar Inst.s, Fast & Slow Paths; with Tests. (#1894)
Diffstat (limited to 'ARMeilleure')
-rw-r--r--ARMeilleure/CodeGen/X86/IntrinsicTable.cs1
-rw-r--r--ARMeilleure/Decoders/OpCodeTable.cs2
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic.cs58
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHelper.cs43
-rw-r--r--ARMeilleure/Instructions/InstName.cs2
-rw-r--r--ARMeilleure/IntermediateRepresentation/Intrinsic.cs1
6 files changed, 93 insertions, 14 deletions
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
index 9030be3c..5deee349 100644
--- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -119,6 +119,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount));
Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary));
Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pshufd, new IntrinsicInfo(X86Instruction.Pshufd, IntrinsicType.BinaryImm));
Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary));
Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary));
Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary));
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index b1912485..928d0e0d 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -311,6 +311,7 @@ namespace ARMeilleure.Decoders
SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, OpCodeSimdReg.Create);
SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, OpCodeSimdReg.Create);
SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, OpCodeSimdReg.Create);
+ SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S, InstEmit.Fmaxnmp_S, OpCodeSimd.Create);
SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, OpCodeSimdReg.Create);
SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, OpCodeSimd.Create);
SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, OpCodeSimdReg.Create);
@@ -319,6 +320,7 @@ namespace ARMeilleure.Decoders
SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, OpCodeSimdReg.Create);
SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, OpCodeSimdReg.Create);
SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, OpCodeSimdReg.Create);
+ SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S, InstEmit.Fminnmp_S, OpCodeSimd.Create);
SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, OpCodeSimdReg.Create);
SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, OpCodeSimd.Create);
SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, OpCodeSimdReg.Create);
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index 88be07bd..bd6a98be 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -347,19 +347,17 @@ namespace ARMeilleure.Instructions
public static void Faddp_S(ArmEmitterContext context)
{
- OpCodeSimd op = (OpCodeSimd)context.CurrOp;
-
- int sizeF = op.Size & 1;
-
if (Optimizations.FastFP && Optimizations.UseSse3)
{
- if (sizeF == 0)
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
{
Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn));
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
}
- else /* if (sizeF == 1) */
+ else /* if ((op.Size & 1) == 1) */
{
Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn));
@@ -368,14 +366,10 @@ namespace ARMeilleure.Instructions
}
else
{
- OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
-
- Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
- Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
-
- Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), ne0, ne1);
-
- context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
}
}
@@ -552,6 +546,24 @@ namespace ARMeilleure.Instructions
}
}
+ public static void Fmaxnmp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
public static void Fmaxnmp_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse41)
@@ -708,6 +720,24 @@ namespace ARMeilleure.Instructions
}
}
+ public static void Fminnmp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
public static void Fminnmp_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse41)
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
index eab891ec..e9d5303c 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -1118,6 +1118,49 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
}
+ public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
+
+ Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand op0, op1;
+
+ if ((op.Size & 1) == 0)
+ {
+ const int sm0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0;
+ const int sm1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0;
+
+ Operand zeroN = context.VectorZeroUpper64(n);
+
+ op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm0));
+ op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm1));
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ Operand zero = context.VectorZero();
+
+ op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero);
+ op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n);
+ }
+
+ context.Copy(GetVec(op.Rd), emit(op0, op1));
+ }
+
public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index a0ec9dc3..990e4393 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -212,6 +212,7 @@ namespace ARMeilleure.Instructions
Fmax_V,
Fmaxnm_S,
Fmaxnm_V,
+ Fmaxnmp_S,
Fmaxnmp_V,
Fmaxnmv_V,
Fmaxp_V,
@@ -220,6 +221,7 @@ namespace ARMeilleure.Instructions
Fmin_V,
Fminnm_S,
Fminnm_V,
+ Fminnmp_S,
Fminnmp_V,
Fminnmv_V,
Fminp_V,
diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
index e2989863..1ddf93e5 100644
--- a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
+++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
@@ -108,6 +108,7 @@ namespace ARMeilleure.IntermediateRepresentation
X86Popcnt,
X86Por,
X86Pshufb,
+ X86Pshufd,
X86Pslld,
X86Pslldq,
X86Psllq,