aboutsummaryrefslogtreecommitdiff
path: root/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs')
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs116
1 files changed, 59 insertions, 57 deletions
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index 7e7f26b1..7b308fa9 100644
--- a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -7,7 +7,6 @@ using ARMeilleure.State;
using ARMeilleure.Translation;
using System;
using System.Diagnostics;
-
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
@@ -185,11 +184,12 @@ namespace ARMeilleure.Instructions
int eSize = 8 << op.Size;
- Operand res = eSize switch {
- 8 => Clz_V_I8 (context, GetVec(op.Rn)),
+ Operand res = eSize switch
+ {
+ 8 => Clz_V_I8(context, GetVec(op.Rn)),
16 => Clz_V_I16(context, GetVec(op.Rn)),
32 => Clz_V_I32(context, GetVec(op.Rn)),
- _ => default
+ _ => default,
};
if (res != default)
@@ -230,14 +230,14 @@ namespace ARMeilleure.Instructions
Operand clzTable = X86GetScalar(context, 0x01_01_01_01_02_02_03_04);
Operand maskLow = X86GetAllElements(context, 0x0f_0f_0f_0f);
- Operand c04 = X86GetAllElements(context, 0x04_04_04_04);
+ Operand c04 = X86GetAllElements(context, 0x04_04_04_04);
// CLZ of low 4 bits of elements in arg.
Operand loClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, arg);
// Get the high 4 bits of elements in arg.
Operand hiArg = context.AddIntrinsic(Intrinsic.X86Psrlw, arg, Const(4));
- hiArg = context.AddIntrinsic(Intrinsic.X86Pand, hiArg, maskLow);
+ hiArg = context.AddIntrinsic(Intrinsic.X86Pand, hiArg, maskLow);
// CLZ of high 4 bits of elements in arg.
Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, hiArg);
@@ -257,8 +257,8 @@ namespace ARMeilleure.Instructions
}
Operand maskSwap = X86GetElements(context, 0x80_0f_80_0d_80_0b_80_09, 0x80_07_80_05_80_03_80_01);
- Operand maskLow = X86GetAllElements(context, 0x00ff_00ff);
- Operand c0008 = X86GetAllElements(context, 0x0008_0008);
+ Operand maskLow = X86GetAllElements(context, 0x00ff_00ff);
+ Operand c0008 = X86GetAllElements(context, 0x0008_0008);
// CLZ pair of high 8 and low 8 bits of elements in arg.
Operand hiloClz = Clz_V_I8(context, arg);
@@ -282,12 +282,14 @@ namespace ARMeilleure.Instructions
return default;
}
+#pragma warning disable IDE0055 // Disable formatting
Operand AddVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Paddd, op0, op1);
Operand SubVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Psubd, op0, op1);
Operand ShiftRightVectorUI32(Operand op0, int imm8) => context.AddIntrinsic(Intrinsic.X86Psrld, op0, Const(imm8));
Operand OrVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Por, op0, op1);
Operand AndVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Pand, op0, op1);
Operand NotVector(Operand op0) => context.AddIntrinsic(Intrinsic.X86Pandn, op0, context.VectorOne());
+#pragma warning restore IDE0055
Operand c55555555 = X86GetAllElements(context, 0x55555555);
Operand c33333333 = X86GetAllElements(context, 0x33333333);
@@ -311,24 +313,24 @@ namespace ARMeilleure.Instructions
// Count leading 1s, which is the population count.
tmp0 = ShiftRightVectorUI32(res, 1);
tmp0 = AndVector(tmp0, c55555555);
- res = SubVectorI32(res, tmp0);
+ res = SubVectorI32(res, tmp0);
tmp0 = ShiftRightVectorUI32(res, 2);
tmp0 = AndVector(tmp0, c33333333);
tmp1 = AndVector(res, c33333333);
- res = AddVectorI32(tmp0, tmp1);
+ res = AddVectorI32(tmp0, tmp1);
tmp0 = ShiftRightVectorUI32(res, 4);
tmp0 = AddVectorI32(tmp0, res);
- res = AndVector(tmp0, c0f0f0f0f);
+ res = AndVector(tmp0, c0f0f0f0f);
tmp0 = ShiftRightVectorUI32(res, 8);
- res = AddVectorI32(tmp0, res);
+ res = AddVectorI32(tmp0, res);
tmp0 = ShiftRightVectorUI32(res, 16);
- res = AddVectorI32(tmp0, res);
+ res = AddVectorI32(tmp0, res);
- res = AndVector(res, c0000003f);
+ res = AndVector(res, c0000003f);
return res;
}
@@ -2436,8 +2438,8 @@ namespace ARMeilleure.Instructions
if (sizeF == 0)
{
- Operand maskHalf = X86GetScalar(context, 0.5f);
- Operand maskThree = X86GetScalar(context, 3f);
+ Operand maskHalf = X86GetScalar(context, 0.5f);
+ Operand maskThree = X86GetScalar(context, 3f);
Operand maskOneHalf = X86GetScalar(context, 1.5f);
if (Optimizations.UseFma)
@@ -2457,8 +2459,8 @@ namespace ARMeilleure.Instructions
}
else /* if (sizeF == 1) */
{
- Operand maskHalf = X86GetScalar(context, 0.5d);
- Operand maskThree = X86GetScalar(context, 3d);
+ Operand maskHalf = X86GetScalar(context, 0.5d);
+ Operand maskThree = X86GetScalar(context, 3d);
Operand maskOneHalf = X86GetScalar(context, 1.5d);
if (Optimizations.UseFma)
@@ -2505,8 +2507,8 @@ namespace ARMeilleure.Instructions
if (sizeF == 0)
{
- Operand maskHalf = X86GetAllElements(context, 0.5f);
- Operand maskThree = X86GetAllElements(context, 3f);
+ Operand maskHalf = X86GetAllElements(context, 0.5f);
+ Operand maskThree = X86GetAllElements(context, 3f);
Operand maskOneHalf = X86GetAllElements(context, 1.5f);
if (Optimizations.UseFma)
@@ -2519,7 +2521,7 @@ namespace ARMeilleure.Instructions
res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
}
- res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
if (op.RegisterSize == RegisterSize.Simd64)
@@ -2531,8 +2533,8 @@ namespace ARMeilleure.Instructions
}
else /* if (sizeF == 1) */
{
- Operand maskHalf = X86GetAllElements(context, 0.5d);
- Operand maskThree = X86GetAllElements(context, 3d);
+ Operand maskHalf = X86GetAllElements(context, 0.5d);
+ Operand maskThree = X86GetAllElements(context, 3d);
Operand maskOneHalf = X86GetAllElements(context, 1.5d);
if (Optimizations.UseFma)
@@ -2545,7 +2547,7 @@ namespace ARMeilleure.Instructions
res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
}
- res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
context.Copy(GetVec(op.Rd), res);
@@ -2824,10 +2826,10 @@ namespace ARMeilleure.Instructions
for (int i = 0; i < 8; i++)
{
Operand mask = context.AddIntrinsic(Intrinsic.X86Psllw, n, Const(15 - i));
- mask = context.AddIntrinsic(Intrinsic.X86Psraw, mask, Const(15));
+ mask = context.AddIntrinsic(Intrinsic.X86Psraw, mask, Const(15));
Operand tmp = context.AddIntrinsic(Intrinsic.X86Psllw, m, Const(i));
- tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask);
+ tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask);
res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp);
}
@@ -2839,12 +2841,12 @@ namespace ARMeilleure.Instructions
for (int i = 0; i < 64; i++)
{
Operand mask = context.AddIntrinsic(Intrinsic.X86Movlhps, n, n);
- mask = context.AddIntrinsic(Intrinsic.X86Psllq, mask, Const(63 - i));
- mask = context.AddIntrinsic(Intrinsic.X86Psrlq, mask, Const(63));
- mask = context.AddIntrinsic(Intrinsic.X86Psubq, zero, mask);
+ mask = context.AddIntrinsic(Intrinsic.X86Psllq, mask, Const(63 - i));
+ mask = context.AddIntrinsic(Intrinsic.X86Psrlq, mask, Const(63));
+ mask = context.AddIntrinsic(Intrinsic.X86Psubq, zero, mask);
Operand tmp = EmitSse2Sll_128(context, m, i);
- tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask);
+ tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask);
res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp);
}
@@ -3119,7 +3121,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
- Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad;
@@ -4058,7 +4060,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
- Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld;
@@ -4594,7 +4596,7 @@ namespace ARMeilleure.Instructions
{
int pairIndex = index << 1;
- Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
+ Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
Operand e = context.Add(ne0, ne1);
@@ -4686,7 +4688,7 @@ namespace ARMeilleure.Instructions
Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
Operand cmp = signed
- ? context.ICompareGreaterOrEqual (op1, op2)
+ ? context.ICompareGreaterOrEqual(op1, op2)
: context.ICompareGreaterOrEqualUI(op1, op2);
return context.ConditionalSelect(cmp, op1, op2);
@@ -4697,7 +4699,7 @@ namespace ARMeilleure.Instructions
Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
Operand cmp = signed
- ? context.ICompareLessOrEqual (op1, op2)
+ ? context.ICompareLessOrEqual(op1, op2)
: context.ICompareLessOrEqualUI(op1, op2);
return context.ConditionalSelect(cmp, op1, op2);
@@ -4852,10 +4854,10 @@ namespace ARMeilleure.Instructions
Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmpps, opF, opF, Const((int)CmpCondition.UnorderedQ));
- Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
- mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal));
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
+ mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal));
- qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : default;
+ qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : default;
sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnps, mask2, mask1) : default;
}
else /* if ((op.Size & 1) == 1) */
@@ -4866,10 +4868,10 @@ namespace ARMeilleure.Instructions
Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmppd, opF, opF, Const((int)CmpCondition.UnorderedQ));
- Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
- mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal));
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
+ mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal));
- qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : default;
+ qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : default;
sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnpd, mask2, mask1) : default;
}
}
@@ -4895,11 +4897,11 @@ namespace ARMeilleure.Instructions
Operand qMask = scalar ? X86GetScalar(context, 1 << QBit) : X86GetAllElements(context, 1 << QBit);
- Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
- resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
+ Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
+ resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, nCopy, resNaNMask);
- resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
+ resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
@@ -4929,11 +4931,11 @@ namespace ARMeilleure.Instructions
Operand qMask = scalar ? X86GetScalar(context, 1L << QBit) : X86GetAllElements(context, 1L << QBit);
- Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
- resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
+ Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
+ resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, nCopy, resNaNMask);
- resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
+ resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
@@ -4964,10 +4966,10 @@ namespace ARMeilleure.Instructions
Operand mask = X86GetAllElements(context, -0f);
Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxps : Intrinsic.X86Minps, n, m);
- res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+ res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
- resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign);
+ resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign);
return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
}
@@ -4976,10 +4978,10 @@ namespace ARMeilleure.Instructions
Operand mask = X86GetAllElements(context, -0d);
Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, n, m);
- res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+ res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
- resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign);
+ resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign);
return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
}
@@ -5003,7 +5005,7 @@ namespace ARMeilleure.Instructions
if (sizeF == 0)
{
Operand negInfMask = scalar
- ? X86GetScalar (context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity)
+ ? X86GetScalar(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity)
: X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
@@ -5038,7 +5040,7 @@ namespace ARMeilleure.Instructions
else /* if (sizeF == 1) */
{
Operand negInfMask = scalar
- ? X86GetScalar (context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity)
+ ? X86GetScalar(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity)
: X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
@@ -5072,7 +5074,7 @@ namespace ARMeilleure.Instructions
{
None,
Add,
- Subtract
+ Subtract,
}
private static void EmitSse41VectorMul_AddSub(ArmEmitterContext context, AddSub addSub)
@@ -5187,10 +5189,10 @@ namespace ARMeilleure.Instructions
Intrinsic subInst = X86PsubInstruction[size];
- Operand res = context.AddIntrinsic(subInst, n, m);
+ Operand res = context.AddIntrinsic(subInst, n, m);
Operand res2 = context.AddIntrinsic(subInst, m, n);
- res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+ res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
@@ -5214,7 +5216,7 @@ namespace ARMeilleure.Instructions
}
Operand high = context.AddIntrinsic(Intrinsic.X86Pslldq, op, Const(8));
- high = context.AddIntrinsic(Intrinsic.X86Psrlq, high, Const(64 - shift));
+ high = context.AddIntrinsic(Intrinsic.X86Psrlq, high, Const(64 - shift));
Operand low = context.AddIntrinsic(Intrinsic.X86Psllq, op, Const(shift));