diff options
Diffstat (limited to 'src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs')
-rw-r--r-- | src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs | 116 |
1 files changed, 59 insertions, 57 deletions
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index 7e7f26b1..7b308fa9 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -7,7 +7,6 @@ using ARMeilleure.State; using ARMeilleure.Translation; using System; using System.Diagnostics; - using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper32; @@ -185,11 +184,12 @@ namespace ARMeilleure.Instructions int eSize = 8 << op.Size; - Operand res = eSize switch { - 8 => Clz_V_I8 (context, GetVec(op.Rn)), + Operand res = eSize switch + { + 8 => Clz_V_I8(context, GetVec(op.Rn)), 16 => Clz_V_I16(context, GetVec(op.Rn)), 32 => Clz_V_I32(context, GetVec(op.Rn)), - _ => default + _ => default, }; if (res != default) @@ -230,14 +230,14 @@ namespace ARMeilleure.Instructions Operand clzTable = X86GetScalar(context, 0x01_01_01_01_02_02_03_04); Operand maskLow = X86GetAllElements(context, 0x0f_0f_0f_0f); - Operand c04 = X86GetAllElements(context, 0x04_04_04_04); + Operand c04 = X86GetAllElements(context, 0x04_04_04_04); // CLZ of low 4 bits of elements in arg. Operand loClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, arg); // Get the high 4 bits of elements in arg. Operand hiArg = context.AddIntrinsic(Intrinsic.X86Psrlw, arg, Const(4)); - hiArg = context.AddIntrinsic(Intrinsic.X86Pand, hiArg, maskLow); + hiArg = context.AddIntrinsic(Intrinsic.X86Pand, hiArg, maskLow); // CLZ of high 4 bits of elements in arg. Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, hiArg); @@ -257,8 +257,8 @@ namespace ARMeilleure.Instructions } Operand maskSwap = X86GetElements(context, 0x80_0f_80_0d_80_0b_80_09, 0x80_07_80_05_80_03_80_01); - Operand maskLow = X86GetAllElements(context, 0x00ff_00ff); - Operand c0008 = X86GetAllElements(context, 0x0008_0008); + Operand maskLow = X86GetAllElements(context, 0x00ff_00ff); + Operand c0008 = X86GetAllElements(context, 0x0008_0008); // CLZ pair of high 8 and low 8 bits of elements in arg. Operand hiloClz = Clz_V_I8(context, arg); @@ -282,12 +282,14 @@ namespace ARMeilleure.Instructions return default; } +#pragma warning disable IDE0055 // Disable formatting Operand AddVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Paddd, op0, op1); Operand SubVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Psubd, op0, op1); Operand ShiftRightVectorUI32(Operand op0, int imm8) => context.AddIntrinsic(Intrinsic.X86Psrld, op0, Const(imm8)); Operand OrVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Por, op0, op1); Operand AndVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Pand, op0, op1); Operand NotVector(Operand op0) => context.AddIntrinsic(Intrinsic.X86Pandn, op0, context.VectorOne()); +#pragma warning restore IDE0055 Operand c55555555 = X86GetAllElements(context, 0x55555555); Operand c33333333 = X86GetAllElements(context, 0x33333333); @@ -311,24 +313,24 @@ namespace ARMeilleure.Instructions // Count leading 1s, which is the population count. tmp0 = ShiftRightVectorUI32(res, 1); tmp0 = AndVector(tmp0, c55555555); - res = SubVectorI32(res, tmp0); + res = SubVectorI32(res, tmp0); tmp0 = ShiftRightVectorUI32(res, 2); tmp0 = AndVector(tmp0, c33333333); tmp1 = AndVector(res, c33333333); - res = AddVectorI32(tmp0, tmp1); + res = AddVectorI32(tmp0, tmp1); tmp0 = ShiftRightVectorUI32(res, 4); tmp0 = AddVectorI32(tmp0, res); - res = AndVector(tmp0, c0f0f0f0f); + res = AndVector(tmp0, c0f0f0f0f); tmp0 = ShiftRightVectorUI32(res, 8); - res = AddVectorI32(tmp0, res); + res = AddVectorI32(tmp0, res); tmp0 = ShiftRightVectorUI32(res, 16); - res = AddVectorI32(tmp0, res); + res = AddVectorI32(tmp0, res); - res = AndVector(res, c0000003f); + res = AndVector(res, c0000003f); return res; } @@ -2436,8 +2438,8 @@ namespace ARMeilleure.Instructions if (sizeF == 0) { - Operand maskHalf = X86GetScalar(context, 0.5f); - Operand maskThree = X86GetScalar(context, 3f); + Operand maskHalf = X86GetScalar(context, 0.5f); + Operand maskThree = X86GetScalar(context, 3f); Operand maskOneHalf = X86GetScalar(context, 1.5f); if (Optimizations.UseFma) @@ -2457,8 +2459,8 @@ namespace ARMeilleure.Instructions } else /* if (sizeF == 1) */ { - Operand maskHalf = X86GetScalar(context, 0.5d); - Operand maskThree = X86GetScalar(context, 3d); + Operand maskHalf = X86GetScalar(context, 0.5d); + Operand maskThree = X86GetScalar(context, 3d); Operand maskOneHalf = X86GetScalar(context, 1.5d); if (Optimizations.UseFma) @@ -2505,8 +2507,8 @@ namespace ARMeilleure.Instructions if (sizeF == 0) { - Operand maskHalf = X86GetAllElements(context, 0.5f); - Operand maskThree = X86GetAllElements(context, 3f); + Operand maskHalf = X86GetAllElements(context, 0.5f); + Operand maskThree = X86GetAllElements(context, 3f); Operand maskOneHalf = X86GetAllElements(context, 1.5f); if (Optimizations.UseFma) @@ -2519,7 +2521,7 @@ namespace ARMeilleure.Instructions res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res); } - res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res); + res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res); res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF); if (op.RegisterSize == RegisterSize.Simd64) @@ -2531,8 +2533,8 @@ namespace ARMeilleure.Instructions } else /* if (sizeF == 1) */ { - Operand maskHalf = X86GetAllElements(context, 0.5d); - Operand maskThree = X86GetAllElements(context, 3d); + Operand maskHalf = X86GetAllElements(context, 0.5d); + Operand maskThree = X86GetAllElements(context, 3d); Operand maskOneHalf = X86GetAllElements(context, 1.5d); if (Optimizations.UseFma) @@ -2545,7 +2547,7 @@ namespace ARMeilleure.Instructions res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res); } - res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res); + res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res); res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF); context.Copy(GetVec(op.Rd), res); @@ -2824,10 +2826,10 @@ namespace ARMeilleure.Instructions for (int i = 0; i < 8; i++) { Operand mask = context.AddIntrinsic(Intrinsic.X86Psllw, n, Const(15 - i)); - mask = context.AddIntrinsic(Intrinsic.X86Psraw, mask, Const(15)); + mask = context.AddIntrinsic(Intrinsic.X86Psraw, mask, Const(15)); Operand tmp = context.AddIntrinsic(Intrinsic.X86Psllw, m, Const(i)); - tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask); + tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask); res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp); } @@ -2839,12 +2841,12 @@ namespace ARMeilleure.Instructions for (int i = 0; i < 64; i++) { Operand mask = context.AddIntrinsic(Intrinsic.X86Movlhps, n, n); - mask = context.AddIntrinsic(Intrinsic.X86Psllq, mask, Const(63 - i)); - mask = context.AddIntrinsic(Intrinsic.X86Psrlq, mask, Const(63)); - mask = context.AddIntrinsic(Intrinsic.X86Psubq, zero, mask); + mask = context.AddIntrinsic(Intrinsic.X86Psllq, mask, Const(63 - i)); + mask = context.AddIntrinsic(Intrinsic.X86Psrlq, mask, Const(63)); + mask = context.AddIntrinsic(Intrinsic.X86Psubq, zero, mask); Operand tmp = EmitSse2Sll_128(context, m, i); - tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask); + tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask); res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp); } @@ -3119,7 +3121,7 @@ namespace ARMeilleure.Instructions Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); - Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad; @@ -4058,7 +4060,7 @@ namespace ARMeilleure.Instructions Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); - Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld; @@ -4594,7 +4596,7 @@ namespace ARMeilleure.Instructions { int pairIndex = index << 1; - Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); + Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); Operand e = context.Add(ne0, ne1); @@ -4686,7 +4688,7 @@ namespace ARMeilleure.Instructions Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); Operand cmp = signed - ? context.ICompareGreaterOrEqual (op1, op2) + ? context.ICompareGreaterOrEqual(op1, op2) : context.ICompareGreaterOrEqualUI(op1, op2); return context.ConditionalSelect(cmp, op1, op2); @@ -4697,7 +4699,7 @@ namespace ARMeilleure.Instructions Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); Operand cmp = signed - ? context.ICompareLessOrEqual (op1, op2) + ? context.ICompareLessOrEqual(op1, op2) : context.ICompareLessOrEqualUI(op1, op2); return context.ConditionalSelect(cmp, op1, op2); @@ -4852,10 +4854,10 @@ namespace ARMeilleure.Instructions Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmpps, opF, opF, Const((int)CmpCondition.UnorderedQ)); - Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask); - mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal)); + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask); + mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal)); - qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : default; + qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : default; sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnps, mask2, mask1) : default; } else /* if ((op.Size & 1) == 1) */ @@ -4866,10 +4868,10 @@ namespace ARMeilleure.Instructions Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmppd, opF, opF, Const((int)CmpCondition.UnorderedQ)); - Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask); - mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal)); + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask); + mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal)); - qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : default; + qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : default; sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnpd, mask2, mask1) : default; } } @@ -4895,11 +4897,11 @@ namespace ARMeilleure.Instructions Operand qMask = scalar ? X86GetScalar(context, 1 << QBit) : X86GetAllElements(context, 1 << QBit); - Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask); - resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask); + Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask); + resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask); Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, nCopy, resNaNMask); - resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask); + resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask); Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nCopy, mCopy, Const((int)CmpCondition.OrderedQ)); @@ -4929,11 +4931,11 @@ namespace ARMeilleure.Instructions Operand qMask = scalar ? X86GetScalar(context, 1L << QBit) : X86GetAllElements(context, 1L << QBit); - Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask); - resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask); + Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask); + resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask); Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, nCopy, resNaNMask); - resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask); + resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask); Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nCopy, mCopy, Const((int)CmpCondition.OrderedQ)); @@ -4964,10 +4966,10 @@ namespace ARMeilleure.Instructions Operand mask = X86GetAllElements(context, -0f); Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxps : Intrinsic.X86Minps, n, m); - res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res); + res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res); Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m); - resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign); + resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign); return context.AddIntrinsic(Intrinsic.X86Por, res, resSign); } @@ -4976,10 +4978,10 @@ namespace ARMeilleure.Instructions Operand mask = X86GetAllElements(context, -0d); Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, n, m); - res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res); + res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res); Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m); - resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign); + resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign); return context.AddIntrinsic(Intrinsic.X86Por, res, resSign); } @@ -5003,7 +5005,7 @@ namespace ARMeilleure.Instructions if (sizeF == 0) { Operand negInfMask = scalar - ? X86GetScalar (context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity) + ? X86GetScalar(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity) : X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity); Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask); @@ -5038,7 +5040,7 @@ namespace ARMeilleure.Instructions else /* if (sizeF == 1) */ { Operand negInfMask = scalar - ? X86GetScalar (context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity) + ? X86GetScalar(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity) : X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity); Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask); @@ -5072,7 +5074,7 @@ namespace ARMeilleure.Instructions { None, Add, - Subtract + Subtract, } private static void EmitSse41VectorMul_AddSub(ArmEmitterContext context, AddSub addSub) @@ -5187,10 +5189,10 @@ namespace ARMeilleure.Instructions Intrinsic subInst = X86PsubInstruction[size]; - Operand res = context.AddIntrinsic(subInst, n, m); + Operand res = context.AddIntrinsic(subInst, n, m); Operand res2 = context.AddIntrinsic(subInst, m, n); - res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res); + res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res); res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2); res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); @@ -5214,7 +5216,7 @@ namespace ARMeilleure.Instructions } Operand high = context.AddIntrinsic(Intrinsic.X86Pslldq, op, Const(8)); - high = context.AddIntrinsic(Intrinsic.X86Psrlq, high, Const(64 - shift)); + high = context.AddIntrinsic(Intrinsic.X86Psrlq, high, Const(64 - shift)); Operand low = context.AddIntrinsic(Intrinsic.X86Psllq, op, Const(shift)); |