aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
diff options
context:
space:
mode:
authorjduncanator <1518948+jduncanator@users.noreply.github.com>2020-03-05 11:41:33 +1100
committerGitHub <noreply@github.com>2020-03-05 11:41:33 +1100
commit68e15c1a7471e4b2844fc0d3c7385523e595521d (patch)
tree3783af4216d1e4b31135d8055ea5bcd44a69276e /ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
parentd9ed827696700ef5b9b777031bab451f23fb837c (diff)
Implement Fast Paths for most A32 SIMD instructions (#952)
* Begin work on A32 SIMD Intrinsics * More instructions, some cleanup. * Intrinsics for Move instructions (zip etc) These pass the existing tests. * Intrinsics for some of Cvt While doing this I noticed that the conversion for int/fp was incorrect in the slow path. I'll fix this in the original repo. * Intrinsics for more Arithmetic instructions. * Intrinsics for Vext * Fix VEXT Intrinsic for double words. * Use InsertPs to move scalar values. * Cleanup, fix VPADD.f32 and VMIN signed integer. * Cleanup, add SSE2 support for scalar insert. Works similarly to the IR scalar insert, but obviously this one works directly on V128. * Minor cleanup. * Enable intrinsic for FP64 to integer conversion. * Address feedback apart from splitting out intrinsic float abs Also: bad VREV encodings as undefined rather than throwing in translation. * Move float abs to helper, fix bug with cvt * Rename opc2 & 3 to match A32 docs, use ArgumentOutOfRangeException appropriately. * Get name of variable at compilation rather than string literal. * Use correct double sign mask.
Diffstat (limited to 'ARMeilleure/Instructions/InstEmitSimdArithmetic.cs')
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic.cs36
1 files changed, 10 insertions, 26 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index 4603ae0b..32e10b0b 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -186,9 +186,7 @@ namespace ARMeilleure.Instructions
{
Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm));
- Operand mask = X86GetScalar(context, -0f);
-
- res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+ res = EmitFloatAbs(context, res, true, false);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
}
@@ -196,9 +194,7 @@ namespace ARMeilleure.Instructions
{
Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm));
- Operand mask = X86GetScalar(context, -0d);
-
- res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+ res = EmitFloatAbs(context, res, false, false);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
}
@@ -226,9 +222,7 @@ namespace ARMeilleure.Instructions
{
Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm));
- Operand mask = X86GetAllElements(context, -0f);
-
- res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+ res = EmitFloatAbs(context, res, true, true);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -241,9 +235,7 @@ namespace ARMeilleure.Instructions
{
Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm));
- Operand mask = X86GetAllElements(context, -0d);
-
- res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+ res = EmitFloatAbs(context, res, false, true);
context.Copy(GetVec(op.Rd), res);
}
@@ -267,17 +259,13 @@ namespace ARMeilleure.Instructions
if (op.Size == 0)
{
- Operand mask = X86GetScalar(context, -0f);
-
- Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, false);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
}
else /* if (op.Size == 1) */
{
- Operand mask = X86GetScalar(context, -0d);
-
- Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, false);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
}
@@ -299,11 +287,9 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
- if (sizeF == 0)
+ if (sizeF == 0)
{
- Operand mask = X86GetAllElements(context, -0f);
-
- Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, true);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -314,9 +300,7 @@ namespace ARMeilleure.Instructions
}
else /* if (sizeF == 1) */
{
- Operand mask = X86GetAllElements(context, -0d);
-
- Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, true);
context.Copy(GetVec(op.Rd), res);
}
@@ -3121,7 +3105,7 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), res);
}
- private static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
+ public static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
{
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;