diff options
author | jduncanator <1518948+jduncanator@users.noreply.github.com> | 2020-03-05 11:41:33 +1100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-05 11:41:33 +1100 |
commit | 68e15c1a7471e4b2844fc0d3c7385523e595521d (patch) | |
tree | 3783af4216d1e4b31135d8055ea5bcd44a69276e /ARMeilleure/Instructions/InstEmitSimdArithmetic.cs | |
parent | d9ed827696700ef5b9b777031bab451f23fb837c (diff) |
Implement Fast Paths for most A32 SIMD instructions (#952)
* Begin work on A32 SIMD Intrinsics
* More instructions, some cleanup.
* Intrinsics for Move instructions (zip etc)
These pass the existing tests.
* Intrinsics for some of Cvt
While doing this I noticed that the conversion for int/fp was incorrect
in the slow path. I'll fix this in the original repo.
* Intrinsics for more Arithmetic instructions.
* Intrinsics for Vext
* Fix VEXT Intrinsic for double words.
* Use InsertPs to move scalar values.
* Cleanup, fix VPADD.f32 and VMIN signed integer.
* Cleanup, add SSE2 support for scalar insert.
Works similarly to the IR scalar insert, but obviously this one works
directly on V128.
* Minor cleanup.
* Enable intrinsic for FP64 to integer conversion.
* Address feedback apart from splitting out intrinsic float abs
Also: bad VREV encodings as undefined rather than throwing in translation.
* Move float abs to helper, fix bug with cvt
* Rename opc2 & 3 to match A32 docs, use ArgumentOutOfRangeException appropriately.
* Get name of variable at compilation rather than string literal.
* Use correct double sign mask.
Diffstat (limited to 'ARMeilleure/Instructions/InstEmitSimdArithmetic.cs')
-rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdArithmetic.cs | 36 |
1 files changed, 10 insertions, 26 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index 4603ae0b..32e10b0b 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -186,9 +186,7 @@ namespace ARMeilleure.Instructions { Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm)); - Operand mask = X86GetScalar(context, -0f); - - res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res); + res = EmitFloatAbs(context, res, true, false); context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); } @@ -196,9 +194,7 @@ namespace ARMeilleure.Instructions { Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm)); - Operand mask = X86GetScalar(context, -0d); - - res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res); + res = EmitFloatAbs(context, res, false, false); context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); } @@ -226,9 +222,7 @@ namespace ARMeilleure.Instructions { Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm)); - Operand mask = X86GetAllElements(context, -0f); - - res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res); + res = EmitFloatAbs(context, res, true, true); if (op.RegisterSize == RegisterSize.Simd64) { @@ -241,9 +235,7 @@ namespace ARMeilleure.Instructions { Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm)); - Operand mask = X86GetAllElements(context, -0d); - - res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res); + res = EmitFloatAbs(context, res, false, true); context.Copy(GetVec(op.Rd), res); } @@ -267,17 +259,13 @@ namespace ARMeilleure.Instructions if (op.Size == 0) { - Operand mask = X86GetScalar(context, -0f); - - Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn)); + Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, false); context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); } else /* if (op.Size == 1) */ { - Operand mask = X86GetScalar(context, -0d); - - Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn)); + Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, false); context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); } @@ -299,11 +287,9 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (sizeF == 0) + if (sizeF == 0) { - Operand mask = X86GetAllElements(context, -0f); - - Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn)); + Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, true); if (op.RegisterSize == RegisterSize.Simd64) { @@ -314,9 +300,7 @@ namespace ARMeilleure.Instructions } else /* if (sizeF == 1) */ { - Operand mask = X86GetAllElements(context, -0d); - - Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn)); + Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, true); context.Copy(GetVec(op.Rd), res); } @@ -3121,7 +3105,7 @@ namespace ARMeilleure.Instructions context.Copy(GetVec(op.Rd), res); } - private static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF) + public static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF) { IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; |