diff options
Diffstat (limited to 'src/ARMeilleure/Instructions/InstEmitSimdHelper.cs')
-rw-r--r-- | src/ARMeilleure/Instructions/InstEmitSimdHelper.cs | 2088 |
1 files changed, 2088 insertions, 0 deletions
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs new file mode 100644 index 00000000..c44c9b4d --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -0,0 +1,2088 @@ +using ARMeilleure.CodeGen.X86; +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func<Operand, Operand>; + using Func2I = Func<Operand, Operand, Operand>; + using Func3I = Func<Operand, Operand, Operand, Operand>; + + static class InstEmitSimdHelper + { +#region "Masks" + public static readonly long[] EvenMasks = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 // S + }; + + public static readonly long[] OddMasks = new long[] + { + 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B + 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H + 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 // S + }; + + public static readonly long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0; + + public static ulong X86GetGf2p8LogicalShiftLeft(int shift) + { + ulong identity = (0b00000001UL << 56) | (0b00000010UL << 48) | (0b00000100UL << 40) | (0b00001000UL << 32) | + (0b00010000UL << 24) | (0b00100000UL << 16) | (0b01000000UL << 8) | (0b10000000UL << 0); + + return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8); + } +#endregion + +#region "X86 SSE Intrinsics" + public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[] + { + Intrinsic.X86Paddb, + Intrinsic.X86Paddw, + Intrinsic.X86Paddd, + Intrinsic.X86Paddq + }; + + public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[] + { + Intrinsic.X86Pcmpeqb, + Intrinsic.X86Pcmpeqw, + Intrinsic.X86Pcmpeqd, + Intrinsic.X86Pcmpeqq + }; + + public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[] + { + Intrinsic.X86Pcmpgtb, + Intrinsic.X86Pcmpgtw, + Intrinsic.X86Pcmpgtd, + Intrinsic.X86Pcmpgtq + }; + + public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[] + { + Intrinsic.X86Pmaxsb, + Intrinsic.X86Pmaxsw, + Intrinsic.X86Pmaxsd + }; + + public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[] + { + Intrinsic.X86Pmaxub, + Intrinsic.X86Pmaxuw, + Intrinsic.X86Pmaxud + }; + + public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[] + { + Intrinsic.X86Pminsb, + Intrinsic.X86Pminsw, + Intrinsic.X86Pminsd + }; + + public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[] + { + Intrinsic.X86Pminub, + Intrinsic.X86Pminuw, + Intrinsic.X86Pminud + }; + + public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[] + { + Intrinsic.X86Pmovsxbw, + Intrinsic.X86Pmovsxwd, + Intrinsic.X86Pmovsxdq + }; + + public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[] + { + Intrinsic.X86Pmovzxbw, + Intrinsic.X86Pmovzxwd, + Intrinsic.X86Pmovzxdq + }; + + public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psllw, + Intrinsic.X86Pslld, + Intrinsic.X86Psllq + }; + + public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psraw, + Intrinsic.X86Psrad + }; + + public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psrlw, + Intrinsic.X86Psrld, + Intrinsic.X86Psrlq + }; + + public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[] + { + Intrinsic.X86Psubb, + Intrinsic.X86Psubw, + Intrinsic.X86Psubd, + Intrinsic.X86Psubq + }; + + public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[] + { + Intrinsic.X86Punpckhbw, + Intrinsic.X86Punpckhwd, + Intrinsic.X86Punpckhdq, + Intrinsic.X86Punpckhqdq + }; + + public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[] + { + Intrinsic.X86Punpcklbw, + Intrinsic.X86Punpcklwd, + Intrinsic.X86Punpckldq, + Intrinsic.X86Punpcklqdq + }; +#endregion + + public static void EnterArmFpMode(EmitterContext context, Func<FPState, Operand> getFpFlag) + { + if (Optimizations.UseSse2) + { + Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr); + + Operand fzTrue = getFpFlag(FPState.FzFlag); + Operand r0True = getFpFlag(FPState.RMode0Flag); + Operand r1True = getFpFlag(FPState.RMode1Flag); + + mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo))); + + mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(fzTrue, Const((int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Um | Mxcsr.Dm)), Const(0))); + + // X86 round modes in order: nearest, negative, positive, zero + // ARM round modes in order: nearest, positive, negative, zero + // Read the bits backwards to correct this. + + mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r0True, Const((int)Mxcsr.Rhi), Const(0))); + mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r1True, Const((int)Mxcsr.Rlo), Const(0))); + + context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr); + } + else if (Optimizations.UseAdvSimd) + { + Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr); + + Operand fzTrue = getFpFlag(FPState.FzFlag); + Operand r0True = getFpFlag(FPState.RMode0Flag); + Operand r1True = getFpFlag(FPState.RMode1Flag); + + fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1))); + + fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(fzTrue, Const((int)FPCR.Fz), Const(0))); + fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r0True, Const((int)FPCR.RMode0), Const(0))); + fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r1True, Const((int)FPCR.RMode1), Const(0))); + + context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr); + + // TODO: Restore FPSR + } + } + + public static void ExitArmFpMode(EmitterContext context, Action<FPState, Operand> setFpFlag) + { + if (Optimizations.UseSse2) + { + Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr); + + // Unset round mode (to nearest) and ftz. + mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo))); + + context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr); + + // Status flags would be stored here if they were used. + } + else if (Optimizations.UseAdvSimd) + { + Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr); + + // Unset round mode (to nearest) and fz. + fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1))); + + context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr); + + // TODO: Store FPSR + } + } + + public static int GetImmShl(OpCodeSimdShImm op) + { + return op.Imm - (8 << op.Size); + } + + public static int GetImmShr(OpCodeSimdShImm op) + { + return (8 << (op.Size + 1)) - op.Imm; + } + + public static Operand X86GetScalar(ArmEmitterContext context, float value) + { + return X86GetScalar(context, BitConverter.SingleToInt32Bits(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, double value) + { + return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, int value) + { + return context.VectorCreateScalar(Const(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, long value) + { + return context.VectorCreateScalar(Const(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, float value) + { + return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, double value) + { + return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, short value) + { + ulong value1 = (ushort)value; + ulong value2 = value1 << 16 | value1; + ulong value4 = value2 << 32 | value2; + + return X86GetAllElements(context, (long)value4); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, int value) + { + Operand vector = context.VectorCreateScalar(Const(value)); + + vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0)); + + return vector; + } + + public static Operand X86GetAllElements(ArmEmitterContext context, long value) + { + Operand vector = context.VectorCreateScalar(Const(value)); + + vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector); + + return vector; + } + + public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0) + { + return X86GetElements(context, (ulong)e1, (ulong)e0); + } + + public static Operand X86GetElements(ArmEmitterContext context, ulong e1, ulong e0) + { + Operand vector0 = context.VectorCreateScalar(Const(e0)); + Operand vector1 = context.VectorCreateScalar(Const(e1)); + + return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1); + } + + public static int X86GetRoundControl(FPRoundingMode roundMode) + { + switch (roundMode) + { + case FPRoundingMode.ToNearest: return 8 | 0; // even + case FPRoundingMode.TowardsPlusInfinity: return 8 | 2; + case FPRoundingMode.TowardsMinusInfinity: return 8 | 1; + case FPRoundingMode.TowardsZero: return 8 | 3; + } + + throw new ArgumentException($"Invalid rounding mode \"{roundMode}\"."); + } + + public static Operand EmitSse41RoundToNearestWithTiesToAwayOpF(ArmEmitterContext context, Operand n, bool scalar) + { + Debug.Assert(n.Type == OperandType.V128); + + Operand nCopy = context.Copy(n); + + Operand rC = Const(X86GetRoundControl(FPRoundingMode.TowardsZero)); + + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + if ((op.Size & 1) == 0) + { + Operand signMask = scalar ? X86GetScalar(context, int.MinValue) : X86GetAllElements(context, int.MinValue); + signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy); + + // 0x3EFFFFFF == BitConverter.SingleToInt32Bits(0.5f) - 1 + Operand valueMask = scalar ? X86GetScalar(context, 0x3EFFFFFF) : X86GetAllElements(context, 0x3EFFFFFF); + valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask); + + nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addss : Intrinsic.X86Addps, nCopy, valueMask); + + nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundss : Intrinsic.X86Roundps, nCopy, rC); + } + else + { + Operand signMask = scalar ? X86GetScalar(context, long.MinValue) : X86GetAllElements(context, long.MinValue); + signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy); + + // 0x3FDFFFFFFFFFFFFFL == BitConverter.DoubleToInt64Bits(0.5d) - 1L + Operand valueMask = scalar ? X86GetScalar(context, 0x3FDFFFFFFFFFFFFFL) : X86GetAllElements(context, 0x3FDFFFFFFFFFFFFFL); + valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask); + + nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addsd : Intrinsic.X86Addpd, nCopy, valueMask); + + nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundsd : Intrinsic.X86Roundpd, nCopy, rC); + } + + return nCopy; + } + + public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.). + { + Debug.Assert(op.Type == OperandType.I32 || op.Type == OperandType.I64); + + Operand op0 = context.Subtract(op, context.BitwiseAnd(context.ShiftRightUI(op, Const(1)), Const(op.Type, 0x55L))); + + Operand c1 = Const(op.Type, 0x33L); + Operand op1 = context.Add(context.BitwiseAnd(context.ShiftRightUI(op0, Const(2)), c1), context.BitwiseAnd(op0, c1)); + + return context.BitwiseAnd(context.Add(op1, context.ShiftRightUI(op1, Const(4))), Const(op.Type, 0x0fL)); + } + + public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n, m); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + MethodInfo info = (op.Size & 1) == 0 + ? typeof(MathF).GetMethod(name, new Type[] { typeof(float) }) + : typeof(Math). GetMethod(name, new Type[] { typeof(double) }); + + return context.Call(info, n); + } + + public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + string name = nameof(Math.Round); + + MethodInfo info = (op.Size & 1) == 0 + ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) }) + : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) }); + + return context.Call(info, n, Const((int)roundMode)); + } + + public static Operand EmitGetRoundingMode(ArmEmitterContext context) + { + Operand rMode = context.ShiftLeft(GetFpFlag(FPState.RMode1Flag), Const(1)); + rMode = context.BitwiseOr(rMode, GetFpFlag(FPState.RMode0Flag)); + + return rMode; + } + + public static Operand EmitRoundByRMode(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.FP32 || op.Type == OperandType.FP64); + + Operand lbl1 = Label(); + Operand lbl2 = Label(); + Operand lbl3 = Label(); + Operand lblEnd = Label(); + + Operand rN = Const((int)FPRoundingMode.ToNearest); + Operand rP = Const((int)FPRoundingMode.TowardsPlusInfinity); + Operand rM = Const((int)FPRoundingMode.TowardsMinusInfinity); + + Operand res = context.AllocateLocal(op.Type); + + Operand rMode = EmitGetRoundingMode(context); + + context.BranchIf(lbl1, rMode, rN, Comparison.NotEqual); + context.Copy(res, EmitRoundMathCall(context, MidpointRounding.ToEven, op)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lbl2, rMode, rP, Comparison.NotEqual); + context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Ceiling), op)); + context.Branch(lblEnd); + + context.MarkLabel(lbl2); + context.BranchIf(lbl3, rMode, rM, Comparison.NotEqual); + context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Floor), op)); + context.Branch(lblEnd); + + context.MarkLabel(lbl3); + context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Truncate), op)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + MethodInfo info = (op.Size & 1) == 0 + ? typeof(SoftFloat32).GetMethod(name) + : typeof(SoftFloat64).GetMethod(name); + + context.ExitArmFpMode(); + context.StoreToContext(); + Operand res = context.Call(info, callArgs); + context.LoadFromContext(); + context.EnterArmFpMode(); + + return res; + } + + public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); + } + + public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand d = context.VectorExtract(type, GetVec(op.Rd), 0); + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0)); + } + + public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size); + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); + + d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0)); + } + + public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); + } + + public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand a = context.VectorExtract(type, GetVec(op.Ra), 0); + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0)); + } + + public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + + res = context.VectorInsert(res, emit(ne), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), index); + + res = context.VectorInsert(res, emit(ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVec(op.Rd), index); + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), index); + + res = context.VectorInsert(res, emit(de, ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + res = context.VectorInsert(res, emit(ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVec(op.Rd), index); + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + res = context.VectorInsert(res, emit(de, ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, emit(imm), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRmBinaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRmBinaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRnRmBinaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRnRmBinaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenRnRmTernaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenRnRmTernaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenBinaryOpByElem(context, emit, signed: true); + } + + public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenBinaryOpByElem(context, emit, signed: false); + } + + private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenTernaryOpByElem(context, emit, signed: true); + } + + public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenTernaryOpByElem(context, emit, signed: false); + } + + private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorPairwiseOp(context, emit, signed: true); + } + + public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorPairwiseOp(context, emit, signed: false); + } + + private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); + Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); + + Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed); + Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size); + res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd64) + { + Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]); + Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks [op.Size]); + + Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n + + Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n + Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right)); + } + else if (op.Size < 3) + { + Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]); + + Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n + Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m + + Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM); + Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right)); + } + else + { + Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); + Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right)); + } + } + + public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false); + } + + public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false); + } + + public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true); + } + + public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true); + } + + private static void EmitVectorAcrossVectorOp( + ArmEmitterContext context, + Func2I emit, + bool signed, + bool isLong) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + + Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed); + + for (int index = 1; index < elems; index++) + { + Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + res = emit(res, n); + } + + int size = isLong ? op.Size + 1 : op.Size; + + Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128); + + Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0); + + for (int index = 1; index < 4; index++) + { + Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index); + + res = emit(res, n); + } + + Operand d = context.VectorInsert(context.VectorZero(), res, 0); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128); + + const int sm0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0; + const int sm1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0; + const int sm2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0; + const int sm3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0; + + Operand nCopy = context.Copy(GetVec(op.Rn)); + + Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm0)); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm1)); + Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm2)); + Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm3)); + + Operand res = emit(emit(part0, part1), emit(part2, part3)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + + public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1); + + Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0); + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand op0, op1; + + if ((op.Size & 1) == 0) + { + const int sm0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0; + const int sm1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0; + + Operand zeroN = context.VectorZeroUpper64(n); + + op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm0)); + op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm1)); + } + else /* if ((op.Size & 1) == 1) */ + { + Operand zero = context.VectorZero(); + + op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero); + op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n); + } + + context.Copy(GetVec(op.Rd), emit(op0, op1)); + } + + public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int pairs = op.GetPairsCount() >> sizeF + 2; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex); + Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1); + + Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex); + Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1); + + res = context.VectorInsert(res, emit(n0, n1), index); + res = context.VectorInsert(res, emit(m0, m1), pairs + index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand nCopy = context.Copy(GetVec(op.Rn)); + Operand mCopy = context.Copy(GetVec(op.Rm)); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + if (op.RegisterSize == RegisterSize.Simd64) + { + Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy); + + Operand zero = context.VectorZero(); + + Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck); + + context.Copy(GetVec(op.Rd), emit(part0, part1)); + } + else /* if (op.RegisterSize == RegisterSize.Simd128) */ + { + const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0; + const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0; + + Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm0)); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm1)); + + context.Copy(GetVec(op.Rd), emit(part0, part1)); + } + } + else /* if (sizeF == 1) */ + { + Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy); + + context.Copy(GetVec(op.Rd), emit(part0, part1)); + } + } + + public enum CmpCondition + { + // Legacy Sse. + Equal = 0, // Ordered, non-signaling. + LessThan = 1, // Ordered, signaling. + LessThanOrEqual = 2, // Ordered, signaling. + UnorderedQ = 3, // Non-signaling. + NotLessThan = 5, // Unordered, signaling. + NotLessThanOrEqual = 6, // Unordered, signaling. + OrderedQ = 7, // Non-signaling. + + // Vex. + GreaterThanOrEqual = 13, // Ordered, signaling. + GreaterThan = 14, // Ordered, signaling. + OrderedS = 23 // Signaling. + } + + [Flags] + public enum SaturatingFlags + { + None = 0, + + ByElem = 1 << 0, + Scalar = 1 << 1, + Signed = 1 << 2, + + Add = 1 << 3, + Sub = 1 << 4, + + Accumulate = 1 << 5 + } + + public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed); + } + + public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Signed); + } + + public static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand de; + + if (op.Size <= 2) + { + de = EmitSignedSrcSatQ(context, emit(ne), op.Size, signedDst: true); + } + else /* if (op.Size == 3) */ + { + de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne)); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None) + { + EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed | flags); + } + + public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.Scalar | flags); + } + + public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None) + { + EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Signed | flags); + } + + public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, flags); + } + + public static void EmitVectorSaturatingBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) + { + EmitSaturatingBinaryOp(context, emit, SaturatingFlags.ByElem | SaturatingFlags.Signed); + } + + public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + bool byElem = (flags & SaturatingFlags.ByElem) != 0; + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + bool signed = (flags & SaturatingFlags.Signed) != 0; + + bool add = (flags & SaturatingFlags.Add) != 0; + bool sub = (flags & SaturatingFlags.Sub) != 0; + + bool accumulate = (flags & SaturatingFlags.Accumulate) != 0; + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + if (add || sub) + { + for (int index = 0; index < elems; index++) + { + Operand de; + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + Operand me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed); + + if (op.Size <= 2) + { + Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me); + + de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed); + } + else /* if (op.Size == 3) */ + { + if (add) + { + de = signed ? EmitBinarySignedSatQAdd(context, ne, me) : EmitBinaryUnsignedSatQAdd(context, ne, me); + } + else /* if (sub) */ + { + de = signed ? EmitBinarySignedSatQSub(context, ne, me) : EmitBinaryUnsignedSatQSub(context, ne, me); + } + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + else if (accumulate) + { + for (int index = 0; index < elems; index++) + { + Operand de; + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed); + Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + if (op.Size <= 2) + { + Operand temp = context.Add(ne, me); + + de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed); + } + else /* if (op.Size == 3) */ + { + de = signed ? EmitBinarySignedSatQAcc(context, ne, me) : EmitBinaryUnsignedSatQAcc(context, ne, me); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + else + { + Operand me = default; + + if (byElem) + { + OpCodeSimdRegElem opRegElem = (OpCodeSimdRegElem)op; + + me = EmitVectorExtract(context, opRegElem.Rm, opRegElem.Index, op.Size, signed); + } + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + if (!byElem) + { + me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed); + } + + Operand de = EmitSignedSrcSatQ(context, emit(ne, me), op.Size, signedDst: signed); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + + context.Copy(GetVec(op.Rd), res); + } + + [Flags] + public enum SaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0; + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + Operand d = GetVec(op.Rd); + + Operand res = part == 0 ? context.VectorZero() : context.Copy(d); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + Operand temp = signedSrc + ? EmitSignedSrcSatQ(context, ne, op.Size, signedDst) + : EmitUnsignedSrcSatQ(context, ne, op.Size, signedDst); + + res = EmitVectorInsert(context, res, temp, part + index, op.Size); + } + + context.Copy(d, res); + } + + // long SignedSignSatQ(long op, int size); + public static Operand EmitSignedSignSatQ(ArmEmitterContext context, Operand op, int size) + { + int eSize = 8 << size; + + Debug.Assert(op.Type == OperandType.I64); + Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64); + + Operand lbl1 = Label(); + Operand lblEnd = Label(); + + Operand zeroL = Const(0L); + Operand maxT = Const((1L << (eSize - 1)) - 1L); + Operand minT = Const(-(1L << (eSize - 1))); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL); + + context.BranchIf(lbl1, op, zeroL, Comparison.LessOrEqual); + context.Copy(res, maxT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lblEnd, op, zeroL, Comparison.GreaterOrEqual); + context.Copy(res, minT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // private static ulong UnsignedSignSatQ(ulong op, int size); + public static Operand EmitUnsignedSignSatQ(ArmEmitterContext context, Operand op, int size) + { + int eSize = 8 << size; + + Debug.Assert(op.Type == OperandType.I64); + Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64); + + Operand lblEnd = Label(); + + Operand zeroUL = Const(0UL); + Operand maxT = Const(ulong.MaxValue >> (64 - eSize)); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL); + + context.BranchIf(lblEnd, op, zeroUL, Comparison.LessOrEqualUI); + context.Copy(res, maxT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // TSrc (16bit, 32bit, 64bit; signed) > TDst (8bit, 16bit, 32bit; signed, unsigned). + // long SignedSrcSignedDstSatQ(long op, int size); ulong SignedSrcUnsignedDstSatQ(long op, int size); + public static Operand EmitSignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst) + { + int eSizeDst = 8 << sizeDst; + + Debug.Assert(op.Type == OperandType.I64); + Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32); + + Operand lbl1 = Label(); + Operand lblEnd = Label(); + + Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL); + Operand minT = signedDst ? Const(-(1L << (eSizeDst - 1))) : Const(0UL); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op); + + context.BranchIf(lbl1, op, maxT, Comparison.LessOrEqual); + context.Copy(res, maxT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lblEnd, op, minT, Comparison.GreaterOrEqual); + context.Copy(res, minT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // TSrc (16bit, 32bit, 64bit; unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned). + // long UnsignedSrcSignedDstSatQ(ulong op, int size); ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size); + public static Operand EmitUnsignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst) + { + int eSizeDst = 8 << sizeDst; + + Debug.Assert(op.Type == OperandType.I64); + Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32); + + Operand lblEnd = Label(); + + Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op); + + context.BranchIf(lblEnd, op, maxT, Comparison.LessOrEqualUI); + context.Copy(res, maxT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // long UnarySignedSatQAbsOrNeg(long op); + private static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I64); + + Operand lblEnd = Label(); + + Operand minL = Const(long.MinValue); + Operand maxL = Const(long.MaxValue); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op); + + context.BranchIf(lblEnd, op, minL, Comparison.NotEqual); + context.Copy(res, maxL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // long BinarySignedSatQAdd(long op1, long op2); + public static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lblEnd = Label(); + + Operand minL = Const(long.MinValue); + Operand maxL = Const(long.MaxValue); + Operand zeroL = Const(0L); + + Operand add = context.Add(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); + + Operand left = context.BitwiseNot(context.BitwiseExclusiveOr(op1, op2)); + Operand right = context.BitwiseExclusiveOr(op1, add); + context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual); + + Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL); + context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL)); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2); + public static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lblEnd = Label(); + + Operand maxUL = Const(ulong.MaxValue); + + Operand add = context.Add(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); + + context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI); + context.Copy(res, maxUL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // long BinarySignedSatQSub(long op1, long op2); + public static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lblEnd = Label(); + + Operand minL = Const(long.MinValue); + Operand maxL = Const(long.MaxValue); + Operand zeroL = Const(0L); + + Operand sub = context.Subtract(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub); + + Operand left = context.BitwiseExclusiveOr(op1, op2); + Operand right = context.BitwiseExclusiveOr(op1, sub); + context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual); + + Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL); + context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL)); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // ulong BinaryUnsignedSatQSub(ulong op1, ulong op2); + public static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lblEnd = Label(); + + Operand zeroL = Const(0L); + + Operand sub = context.Subtract(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub); + + context.BranchIf(lblEnd, op1, op2, Comparison.GreaterOrEqualUI); + context.Copy(res, zeroL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // long BinarySignedSatQAcc(ulong op1, long op2); + private static Operand EmitBinarySignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lbl1 = Label(); + Operand lbl2 = Label(); + Operand lblEnd = Label(); + + Operand maxL = Const(long.MaxValue); + Operand zeroL = Const(0L); + + Operand add = context.Add(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); + + context.BranchIf(lbl1, op1, maxL, Comparison.GreaterUI); + Operand notOp2AndRes = context.BitwiseAnd(context.BitwiseNot(op2), add); + context.BranchIf(lblEnd, notOp2AndRes, zeroL, Comparison.GreaterOrEqual); + context.Copy(res, maxL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lbl2, op2, zeroL, Comparison.Less); + context.Copy(res, maxL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lbl2); + context.BranchIf(lblEnd, add, maxL, Comparison.LessOrEqualUI); + context.Copy(res, maxL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // ulong BinaryUnsignedSatQAcc(long op1, ulong op2); + private static Operand EmitBinaryUnsignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lbl1 = Label(); + Operand lblEnd = Label(); + + Operand maxUL = Const(ulong.MaxValue); + Operand maxL = Const(long.MaxValue); + Operand zeroL = Const(0L); + + Operand add = context.Add(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); + + context.BranchIf(lbl1, op1, zeroL, Comparison.Less); + context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI); + context.Copy(res, maxUL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lblEnd, op2, maxL, Comparison.GreaterUI); + context.BranchIf(lblEnd, add, zeroL, Comparison.GreaterOrEqual); + context.Copy(res, zeroL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector) + { + Operand mask; + if (single) + { + mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f); + } + else + { + mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d); + } + + return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value); + } + + public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract(context, reg, index, size, true); + } + + public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract(context, reg, index, size, false); + } + + public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed) + { + ThrowIfInvalid(index, size); + + Operand res = default; + + switch (size) + { + case 0: + res = context.VectorExtract8(GetVec(reg), index); + break; + + case 1: + res = context.VectorExtract16(GetVec(reg), index); + break; + + case 2: + res = context.VectorExtract(OperandType.I32, GetVec(reg), index); + break; + + case 3: + res = context.VectorExtract(OperandType.I64, GetVec(reg), index); + break; + } + + if (signed) + { + switch (size) + { + case 0: res = context.SignExtend8 (OperandType.I64, res); break; + case 1: res = context.SignExtend16(OperandType.I64, res); break; + case 2: res = context.SignExtend32(OperandType.I64, res); break; + } + } + else + { + switch (size) + { + case 0: res = context.ZeroExtend8 (OperandType.I64, res); break; + case 1: res = context.ZeroExtend16(OperandType.I64, res); break; + case 2: res = context.ZeroExtend32(OperandType.I64, res); break; + } + } + + return res; + } + + public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size) + { + ThrowIfInvalid(index, size); + + if (size < 3 && value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + + switch (size) + { + case 0: vector = context.VectorInsert8 (vector, value, index); break; + case 1: vector = context.VectorInsert16(vector, value, index); break; + case 2: vector = context.VectorInsert (vector, value, index); break; + case 3: vector = context.VectorInsert (vector, value, index); break; + } + + return vector; + } + + public static void ThrowIfInvalid(int index, int size) + { + if ((uint)size > 3u) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if ((uint)index >= 16u >> size) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + } + } +} |