From 5e0f8e873857ce3ca3f532aff0936beb28e412c8 Mon Sep 17 00:00:00 2001 From: gdkchan <gab.dark.100@gmail.com> Date: Tue, 10 Jan 2023 19:16:59 -0300 Subject: Implement JIT Arm64 backend (#4114) * Implement JIT Arm64 backend * PPTC version bump * Address some feedback from Arm64 JIT PR * Address even more PR feedback * Remove unused IsPageAligned function * Sync Qc flag before calls * Fix comment and remove unused enum * Address riperiperi PR feedback * Delete Breakpoint IR instruction that was only implemented for Arm64 --- ARMeilleure/Instructions/InstEmitSimdArithmetic.cs | 1494 ++++++++++++++++---- .../Instructions/InstEmitSimdArithmetic32.cs | 229 ++- ARMeilleure/Instructions/InstEmitSimdCmp.cs | 18 +- ARMeilleure/Instructions/InstEmitSimdCmp32.cs | 66 +- ARMeilleure/Instructions/InstEmitSimdCvt.cs | 294 +++- ARMeilleure/Instructions/InstEmitSimdCvt32.cs | 103 +- .../Instructions/InstEmitSimdHelper32Arm64.cs | 366 +++++ .../Instructions/InstEmitSimdHelperArm64.cs | 720 ++++++++++ ARMeilleure/Instructions/InstEmitSimdLogical.cs | 54 +- ARMeilleure/Instructions/InstEmitSimdLogical32.cs | 54 +- ARMeilleure/Instructions/InstEmitSimdMove32.cs | 58 +- ARMeilleure/Instructions/InstEmitSimdShift.cs | 517 ++++++- ARMeilleure/Instructions/InstEmitSystem.cs | 4 + 13 files changed, 3517 insertions(+), 460 deletions(-) create mode 100644 ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs (limited to 'ARMeilleure/Instructions') diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index b91c522e..3e65db23 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -21,22 +21,47 @@ namespace ARMeilleure.Instructions { public static void Abs_S(ArmEmitterContext context) { - EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AbsS); + } + else + { + EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } } public static void Abs_V(ArmEmitterContext context) { - EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AbsV); + } + else + { + EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } } public static void Add_S(ArmEmitterContext context) { - EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64AddS); + } + else + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } } public static void Add_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -62,24 +87,42 @@ namespace ARMeilleure.Instructions public static void Addhn_V(ArmEmitterContext context) { - EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64AddhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false); + } } public static void Addp_S(ArmEmitterContext context) { - OpCodeSimd op = (OpCodeSimd)context.CurrOp; + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AddpS); + } + else + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; - Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size); - Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size); + Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size); - Operand res = context.Add(ne0, ne1); + Operand res = context.Add(ne0, ne1); - context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size)); + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size)); + } } public static void Addp_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddpV); + } + else if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp(context, X86PaddInstruction); } @@ -91,68 +134,89 @@ namespace ARMeilleure.Instructions public static void Addv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AddvV); + } + else + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } } public static void Cls_V(ArmEmitterContext context) { - OpCodeSimd op = (OpCodeSimd)context.CurrOp; + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClsV); + } + else + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; - Operand res = context.VectorZero(); + Operand res = context.VectorZero(); - int elems = op.GetBytesCount() >> op.Size; + int elems = op.GetBytesCount() >> op.Size; - int eSize = 8 << op.Size; + int eSize = 8 << op.Size; - for (int index = 0; index < elems; index++) - { - Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); - Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize)); + Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize)); - res = EmitVectorInsert(context, res, de, index, op.Size); - } + res = EmitVectorInsert(context, res, de, index, op.Size); + } - context.Copy(GetVec(op.Rd), res); + context.Copy(GetVec(op.Rd), res); + } } public static void Clz_V(ArmEmitterContext context) { - OpCodeSimd op = (OpCodeSimd)context.CurrOp; - - int eSize = 8 << op.Size; - - Operand res = eSize switch { - 8 => Clz_V_I8 (context, GetVec(op.Rn)), - 16 => Clz_V_I16(context, GetVec(op.Rn)), - 32 => Clz_V_I32(context, GetVec(op.Rn)), - _ => default - }; - - if (res != default) + if (Optimizations.UseAdvSimd) { - if (op.RegisterSize == RegisterSize.Simd64) - { - res = context.VectorZeroUpper64(res); - } + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClzV); } else { - int elems = op.GetBytesCount() >> op.Size; + OpCodeSimd op = (OpCodeSimd)context.CurrOp; - res = context.VectorZero(); + int eSize = 8 << op.Size; - for (int index = 0; index < elems; index++) + Operand res = eSize switch { + 8 => Clz_V_I8 (context, GetVec(op.Rn)), + 16 => Clz_V_I16(context, GetVec(op.Rn)), + 32 => Clz_V_I32(context, GetVec(op.Rn)), + _ => default + }; + + if (res != default) { - Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + } + else + { + int elems = op.GetBytesCount() >> op.Size; - Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize)); + res = context.VectorZero(); - res = EmitVectorInsert(context, res, de, index, op.Size); + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize)); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } } - } - context.Copy(GetVec(op.Rd), res); + context.Copy(GetVec(op.Rd), res); + } } private static Operand Clz_V_I8(ArmEmitterContext context, Operand arg) @@ -271,36 +335,47 @@ namespace ARMeilleure.Instructions public static void Cnt_V(ArmEmitterContext context) { - OpCodeSimd op = (OpCodeSimd)context.CurrOp; + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64CntV); + } + else + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; - Operand res = context.VectorZero(); + Operand res = context.VectorZero(); - int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; - for (int index = 0; index < elems; index++) - { - Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); - Operand de; + Operand de; - if (Optimizations.UsePopCnt) - { - de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne); - } - else - { - de = EmitCountSetBits8(context, ne); + if (Optimizations.UsePopCnt) + { + de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne); + } + else + { + de = EmitCountSetBits8(context, ne); + } + + res = EmitVectorInsert(context, res, de, index, 0); } - res = EmitVectorInsert(context, res, de, index, 0); + context.Copy(GetVec(op.Rd), res); } - - context.Copy(GetVec(op.Rd), res); } public static void Fabd_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FabdS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -336,7 +411,11 @@ namespace ARMeilleure.Instructions public static void Fabd_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FabdV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -377,7 +456,11 @@ namespace ARMeilleure.Instructions public static void Fabs_S(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FabsS); + } + else if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -405,7 +488,11 @@ namespace ARMeilleure.Instructions public static void Fabs_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FabsV); + } + else if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -440,7 +527,11 @@ namespace ARMeilleure.Instructions public static void Fadd_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd); } @@ -459,7 +550,11 @@ namespace ARMeilleure.Instructions public static void Fadd_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); } @@ -478,7 +573,11 @@ namespace ARMeilleure.Instructions public static void Faddp_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FaddpS); + } + else if (Optimizations.FastFP && Optimizations.UseSse3) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -506,7 +605,11 @@ namespace ARMeilleure.Instructions public static void Faddp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorPairwiseOpF(context, (op1, op2) => { @@ -534,7 +637,11 @@ namespace ARMeilleure.Instructions public static void Fdiv_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FdivS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd); } @@ -553,7 +660,11 @@ namespace ARMeilleure.Instructions public static void Fdiv_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FdivV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd); } @@ -572,7 +683,11 @@ namespace ARMeilleure.Instructions public static void Fmadd_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -607,7 +722,11 @@ namespace ARMeilleure.Instructions public static void Fmax_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41ProcessNaNsOpF(context, (op1, op2) => { @@ -628,7 +747,11 @@ namespace ARMeilleure.Instructions public static void Fmax_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41ProcessNaNsOpF(context, (op1, op2) => { @@ -649,7 +772,11 @@ namespace ARMeilleure.Instructions public static void Fmaxnm_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true); } @@ -664,7 +791,11 @@ namespace ARMeilleure.Instructions public static void Fmaxnm_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false); } @@ -679,7 +810,11 @@ namespace ARMeilleure.Instructions public static void Fmaxnmp_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxnmpS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2ScalarPairwiseOpF(context, (op1, op2) => { @@ -697,7 +832,11 @@ namespace ARMeilleure.Instructions public static void Fmaxnmp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorPairwiseOpF(context, (op1, op2) => { @@ -715,7 +854,11 @@ namespace ARMeilleure.Instructions public static void Fmaxnmv_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxnmvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => { @@ -733,7 +876,11 @@ namespace ARMeilleure.Instructions public static void Fmaxp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorPairwiseOpF(context, (op1, op2) => { @@ -757,7 +904,11 @@ namespace ARMeilleure.Instructions public static void Fmaxv_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => { @@ -781,7 +932,11 @@ namespace ARMeilleure.Instructions public static void Fmin_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41ProcessNaNsOpF(context, (op1, op2) => { @@ -802,7 +957,11 @@ namespace ARMeilleure.Instructions public static void Fmin_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41ProcessNaNsOpF(context, (op1, op2) => { @@ -823,7 +982,11 @@ namespace ARMeilleure.Instructions public static void Fminnm_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true); } @@ -838,7 +1001,11 @@ namespace ARMeilleure.Instructions public static void Fminnm_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false); } @@ -853,7 +1020,11 @@ namespace ARMeilleure.Instructions public static void Fminnmp_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminnmpS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2ScalarPairwiseOpF(context, (op1, op2) => { @@ -871,7 +1042,11 @@ namespace ARMeilleure.Instructions public static void Fminnmp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorPairwiseOpF(context, (op1, op2) => { @@ -889,7 +1064,11 @@ namespace ARMeilleure.Instructions public static void Fminnmv_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminnmvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => { @@ -907,7 +1086,11 @@ namespace ARMeilleure.Instructions public static void Fminp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorPairwiseOpF(context, (op1, op2) => { @@ -931,7 +1114,11 @@ namespace ARMeilleure.Instructions public static void Fminv_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => { @@ -955,15 +1142,26 @@ namespace ARMeilleure.Instructions public static void Fmla_Se(ArmEmitterContext context) // Fused. { - EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaSe); + } + else + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } } public static void Fmla_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlaV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1006,7 +1204,11 @@ namespace ARMeilleure.Instructions public static void Fmla_Ve(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaVe); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; @@ -1055,15 +1257,26 @@ namespace ARMeilleure.Instructions public static void Fmls_Se(ArmEmitterContext context) // Fused. { - EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Subtract(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsSe); + } + else + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } } public static void Fmls_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1106,7 +1319,11 @@ namespace ARMeilleure.Instructions public static void Fmls_Ve(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsVe); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; @@ -1155,7 +1372,11 @@ namespace ARMeilleure.Instructions public static void Fmsub_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1190,7 +1411,11 @@ namespace ARMeilleure.Instructions public static void Fmul_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); } @@ -1209,12 +1434,23 @@ namespace ARMeilleure.Instructions public static void Fmul_Se(ArmEmitterContext context) { - EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulSe); + } + else + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Fmul_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); } @@ -1233,7 +1469,11 @@ namespace ARMeilleure.Instructions public static void Fmul_Ve(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulVe); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; @@ -1283,39 +1523,71 @@ namespace ARMeilleure.Instructions public static void Fmulx_S(ArmEmitterContext context) { - EmitScalarBinaryOpF(context, (op1, op2) => + if (Optimizations.UseAdvSimd) { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); - }); + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulxS); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } } public static void Fmulx_Se(ArmEmitterContext context) { - EmitScalarBinaryOpByElemF(context, (op1, op2) => + if (Optimizations.UseAdvSimd) { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); - }); + InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulxSe); + } + else + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } } public static void Fmulx_V(ArmEmitterContext context) { - EmitVectorBinaryOpF(context, (op1, op2) => + if (Optimizations.UseAdvSimd) { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); - }); + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulxV); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } } public static void Fmulx_Ve(ArmEmitterContext context) { - EmitVectorBinaryOpByElemF(context, (op1, op2) => + if (Optimizations.UseAdvSimd) { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); - }); + InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulxVe); + } + else + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } } public static void Fneg_S(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FnegS); + } + else if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -1344,7 +1616,11 @@ namespace ARMeilleure.Instructions public static void Fneg_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FnegV); + } + else if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -1380,7 +1656,11 @@ namespace ARMeilleure.Instructions public static void Fnmadd_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1423,7 +1703,11 @@ namespace ARMeilleure.Instructions public static void Fnmsub_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1466,7 +1750,14 @@ namespace ARMeilleure.Instructions public static void Fnmul_S(ArmEmitterContext context) { - EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FnmulS); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + } } public static void Frecpe_S(ArmEmitterContext context) @@ -1475,7 +1766,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrecpeS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true); @@ -1496,7 +1791,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrecpeV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false); @@ -1518,9 +1817,13 @@ namespace ARMeilleure.Instructions public static void Frecps_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) { - OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpsS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -1561,7 +1864,11 @@ namespace ARMeilleure.Instructions public static void Frecps_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrecpsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1609,15 +1916,26 @@ namespace ARMeilleure.Instructions public static void Frecpx_S(ArmEmitterContext context) { - EmitScalarUnaryOpF(context, (op1) => + if (Optimizations.UseAdvSimd) { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1); - }); + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpxS); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1); + }); + } } public static void Frinta_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintaS); + } + else if (Optimizations.UseSse41) { EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearestAway); } @@ -1632,7 +1950,11 @@ namespace ARMeilleure.Instructions public static void Frinta_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintaV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearestAway); } @@ -1647,23 +1969,41 @@ namespace ARMeilleure.Instructions public static void Frinti_S(ArmEmitterContext context) { - EmitScalarUnaryOpF(context, (op1) => + if (Optimizations.UseAdvSimd) { - return EmitRoundByRMode(context, op1); - }); + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintiS); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundByRMode(context, op1); + }); + } } public static void Frinti_V(ArmEmitterContext context) { - EmitVectorUnaryOpF(context, (op1) => + if (Optimizations.UseAdvSimd) { - return EmitRoundByRMode(context, op1); - }); + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintiV); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundByRMode(context, op1); + }); + } } public static void Frintm_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintmS); + } + else if (Optimizations.UseSse41) { EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); } @@ -1678,7 +2018,11 @@ namespace ARMeilleure.Instructions public static void Frintm_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintmV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); } @@ -1693,7 +2037,11 @@ namespace ARMeilleure.Instructions public static void Frintn_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintnS); + } + else if (Optimizations.UseSse41) { EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest); } @@ -1708,7 +2056,11 @@ namespace ARMeilleure.Instructions public static void Frintn_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintnV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest); } @@ -1723,7 +2075,11 @@ namespace ARMeilleure.Instructions public static void Frintp_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintpS); + } + else if (Optimizations.UseSse41) { EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); } @@ -1738,7 +2094,11 @@ namespace ARMeilleure.Instructions public static void Frintp_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintpV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); } @@ -1753,6 +2113,7 @@ namespace ARMeilleure.Instructions public static void Frintx_S(ArmEmitterContext context) { + // TODO Arm64: Fast path. Should we set host FPCR? EmitScalarUnaryOpF(context, (op1) => { return EmitRoundByRMode(context, op1); @@ -1761,6 +2122,7 @@ namespace ARMeilleure.Instructions public static void Frintx_V(ArmEmitterContext context) { + // TODO Arm64: Fast path. Should we set host FPCR? EmitVectorUnaryOpF(context, (op1) => { return EmitRoundByRMode(context, op1); @@ -1769,7 +2131,11 @@ namespace ARMeilleure.Instructions public static void Frintz_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintzS); + } + else if (Optimizations.UseSse41) { EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero); } @@ -1784,7 +2150,11 @@ namespace ARMeilleure.Instructions public static void Frintz_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintzV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero); } @@ -1803,7 +2173,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrsqrteS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true); @@ -1824,7 +2198,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrsqrteV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false); @@ -1846,7 +2224,11 @@ namespace ARMeilleure.Instructions public static void Frsqrts_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrsqrtsS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1895,7 +2277,11 @@ namespace ARMeilleure.Instructions public static void Frsqrts_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrsqrtsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1949,7 +2335,11 @@ namespace ARMeilleure.Instructions public static void Fsqrt_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FsqrtS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd); } @@ -1964,7 +2354,11 @@ namespace ARMeilleure.Instructions public static void Fsqrt_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FsqrtV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd); } @@ -1979,7 +2373,11 @@ namespace ARMeilleure.Instructions public static void Fsub_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd); } @@ -1998,7 +2396,11 @@ namespace ARMeilleure.Instructions public static void Fsub_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FsubV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd); } @@ -2017,7 +2419,11 @@ namespace ARMeilleure.Instructions public static void Mla_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlaV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorMul_AddSub(context, AddSub.Add); } @@ -2032,15 +2438,26 @@ namespace ARMeilleure.Instructions public static void Mla_Ve(ArmEmitterContext context) { - EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlaVe); + } + else + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } } public static void Mls_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlsV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorMul_AddSub(context, AddSub.Subtract); } @@ -2055,15 +2472,26 @@ namespace ARMeilleure.Instructions public static void Mls_Ve(ArmEmitterContext context) { - EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Subtract(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlsVe); + } + else + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } } public static void Mul_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64MulV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorMul_AddSub(context, AddSub.None); } @@ -2075,17 +2503,35 @@ namespace ARMeilleure.Instructions public static void Mul_Ve(ArmEmitterContext context) { - EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64MulVe); + } + else + { + EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Neg_S(ArmEmitterContext context) { - EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64NegS); + } + else + { + EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1)); + } } public static void Neg_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64NegV); + } + else if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -2110,7 +2556,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UsePclmulqdq && op.Size == 3) + if (Optimizations.UseAdvSimd && false) // Not supported by all Arm CPUs. + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV); + } + else if (Optimizations.UsePclmulqdq && op.Size == 3) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2214,33 +2664,65 @@ namespace ARMeilleure.Instructions public static void Raddhn_V(ArmEmitterContext context) { - EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RaddhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true); + } } public static void Rsubhn_V(ArmEmitterContext context) { - EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RsubhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true); + } } public static void Saba_V(ArmEmitterContext context) { - EmitVectorTernaryOpSx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabaV); + } + else + { + EmitVectorTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } } public static void Sabal_V(ArmEmitterContext context) { - EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabalV); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } } public static void Sabd_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2262,7 +2744,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdlV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2293,12 +2779,23 @@ namespace ARMeilleure.Instructions public static void Sadalp_V(ArmEmitterContext context) { - EmitAddLongPairwise(context, signed: true, accumulate: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64SadalpV); + } + else + { + EmitAddLongPairwise(context, signed: true, accumulate: true); + } } public static void Saddl_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddlV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2328,17 +2825,35 @@ namespace ARMeilleure.Instructions public static void Saddlp_V(ArmEmitterContext context) { - EmitAddLongPairwise(context, signed: true, accumulate: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlpV); + } + else + { + EmitAddLongPairwise(context, signed: true, accumulate: false); + } } public static void Saddlv_V(ArmEmitterContext context) { - EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlvV); + } + else + { + EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2)); + } } public static void Saddw_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddwV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2368,7 +2883,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShaddV); + } + else if (Optimizations.UseSse2 && op.Size > 0) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2404,7 +2923,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShsubV); + } + else if (Optimizations.UseSse2 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2442,7 +2965,11 @@ namespace ARMeilleure.Instructions public static void Smax_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2468,7 +2995,11 @@ namespace ARMeilleure.Instructions public static void Smaxp_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxpV); + } + else if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp(context, X86PmaxsInstruction); } @@ -2480,12 +3011,23 @@ namespace ARMeilleure.Instructions public static void Smaxv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SmaxvV); + } + else + { + EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true)); + } } public static void Smin_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2511,7 +3053,11 @@ namespace ARMeilleure.Instructions public static void Sminp_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminpV); + } + else if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp(context, X86PminsInstruction); } @@ -2523,14 +3069,25 @@ namespace ARMeilleure.Instructions public static void Sminv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SminvV); + } + else + { + EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true)); + } } public static void Smlal_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlalV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); @@ -2566,17 +3123,28 @@ namespace ARMeilleure.Instructions public static void Smlal_Ve(ArmEmitterContext context) { - EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlalVe); + } + else + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } } public static void Smlsl_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlslV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); @@ -2612,117 +3180,268 @@ namespace ARMeilleure.Instructions public static void Smlsl_Ve(ArmEmitterContext context) { - EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Subtract(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlslVe); + } + else + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } } public static void Smull_V(ArmEmitterContext context) { - EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmullV); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Smull_Ve(ArmEmitterContext context) { - EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64SmullVe); + } + else + { + EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Sqabs_S(ArmEmitterContext context) { - EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqabsS); + } + else + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } } public static void Sqabs_V(ArmEmitterContext context) { - EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqabsV); + } + else + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } } public static void Sqadd_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqaddS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); + } } public static void Sqadd_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqaddV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); + } } public static void Sqdmulh_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + } } public static void Sqdmulh_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + } } public static void Sqdmulh_Ve(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqdmulhVe); + } + else + { + EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + } } public static void Sqneg_S(ArmEmitterContext context) { - EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqnegS); + } + else + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } } public static void Sqneg_V(ArmEmitterContext context) { - EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqnegV); + } + else + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } } public static void Sqrdmulh_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + } } public static void Sqrdmulh_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + } } public static void Sqrdmulh_Ve(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqrdmulhVe); + } + else + { + EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + } } public static void Sqsub_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqsubS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); + } } public static void Sqsub_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqsubV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); + } } public static void Sqxtn_S(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnS); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx); + } } public static void Sqxtn_V(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnV); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx); + } } public static void Sqxtun_S(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunS); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx); + } } public static void Sqxtun_V(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunV); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx); + } } public static void Srhadd_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrhaddV); + } + else if (Optimizations.UseSse2 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2764,7 +3483,11 @@ namespace ARMeilleure.Instructions public static void Ssubl_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsublV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2794,7 +3517,11 @@ namespace ARMeilleure.Instructions public static void Ssubw_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsubwV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2822,12 +3549,23 @@ namespace ARMeilleure.Instructions public static void Sub_S(ArmEmitterContext context) { - EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SubS); + } + else + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } } public static void Sub_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SubV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2853,38 +3591,77 @@ namespace ARMeilleure.Instructions public static void Subhn_V(ArmEmitterContext context) { - EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SubhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false); + } } public static void Suqadd_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); + } } public static void Suqadd_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); + } } public static void Uaba_V(ArmEmitterContext context) { - EmitVectorTernaryOpZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabaV); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } } public static void Uabal_V(ArmEmitterContext context) { - EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabalV); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } } public static void Uabd_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2906,7 +3683,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdlV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2937,12 +3718,23 @@ namespace ARMeilleure.Instructions public static void Uadalp_V(ArmEmitterContext context) { - EmitAddLongPairwise(context, signed: false, accumulate: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64UadalpV); + } + else + { + EmitAddLongPairwise(context, signed: false, accumulate: true); + } } public static void Uaddl_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddlV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2972,17 +3764,35 @@ namespace ARMeilleure.Instructions public static void Uaddlp_V(ArmEmitterContext context) { - EmitAddLongPairwise(context, signed: false, accumulate: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlpV); + } + else + { + EmitAddLongPairwise(context, signed: false, accumulate: false); + } } public static void Uaddlv_V(ArmEmitterContext context) { - EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlvV); + } + else + { + EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } } public static void Uaddw_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddwV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -3012,7 +3822,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhaddV); + } + else if (Optimizations.UseSse2 && op.Size > 0) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -3048,7 +3862,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhsubV); + } + else if (Optimizations.UseSse2 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -3079,7 +3897,11 @@ namespace ARMeilleure.Instructions public static void Umax_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -3105,7 +3927,11 @@ namespace ARMeilleure.Instructions public static void Umaxp_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxpV); + } + else if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp(context, X86PmaxuInstruction); } @@ -3117,12 +3943,23 @@ namespace ARMeilleure.Instructions public static void Umaxv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UmaxvV); + } + else + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false)); + } } public static void Umin_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -3148,7 +3985,11 @@ namespace ARMeilleure.Instructions public static void Uminp_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminpV); + } + else if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp(context, X86PminuInstruction); } @@ -3160,14 +4001,25 @@ namespace ARMeilleure.Instructions public static void Uminv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UminvV); + } + else + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false)); + } } public static void Umlal_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlalV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); @@ -3203,17 +4055,28 @@ namespace ARMeilleure.Instructions public static void Umlal_Ve(ArmEmitterContext context) { - EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlalVe); + } + else + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } } public static void Umlsl_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlslV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); @@ -3249,57 +4112,124 @@ namespace ARMeilleure.Instructions public static void Umlsl_Ve(ArmEmitterContext context) { - EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Subtract(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlslVe); + } + else + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } } public static void Umull_V(ArmEmitterContext context) { - EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmullV); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Umull_Ve(ArmEmitterContext context) { - EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64UmullVe); + } + else + { + EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Uqadd_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqaddS); + } + else + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } } public static void Uqadd_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqaddV); + } + else + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } } public static void Uqsub_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqsubS); + } + else + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } } public static void Uqsub_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqsubV); + } + else + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } } public static void Uqxtn_S(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnS); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx); + } } public static void Uqxtn_V(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnV); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx); + } } public static void Urhadd_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrhaddV); + } + else if (Optimizations.UseSse2 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -3330,17 +4260,35 @@ namespace ARMeilleure.Instructions public static void Usqadd_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddS); + } + else + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } } public static void Usqadd_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddV); + } + else + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } } public static void Usubl_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsublV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -3370,7 +4318,11 @@ namespace ARMeilleure.Instructions public static void Usubw_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsubwV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index 79b376e9..a9994e41 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -2,6 +2,7 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Translation; using System; + using static ARMeilleure.Instructions.InstEmitFlowHelper; using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper; @@ -30,7 +31,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FabsS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarUnaryOpSimd32(context, (m) => { @@ -49,7 +54,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FabsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { @@ -76,7 +85,11 @@ namespace ARMeilleure.Instructions public static void Vadd_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd); } @@ -92,7 +105,11 @@ namespace ARMeilleure.Instructions public static void Vadd_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FaddV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); } @@ -280,7 +297,11 @@ namespace ARMeilleure.Instructions public static void Vfma_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd); } @@ -299,7 +320,11 @@ namespace ARMeilleure.Instructions public static void Vfma_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps); } @@ -314,7 +339,11 @@ namespace ARMeilleure.Instructions public static void Vfms_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd); } @@ -333,7 +362,11 @@ namespace ARMeilleure.Instructions public static void Vfms_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps); } @@ -348,7 +381,11 @@ namespace ARMeilleure.Instructions public static void Vfnma_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd); } @@ -367,7 +404,11 @@ namespace ARMeilleure.Instructions public static void Vfnms_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd); } @@ -419,7 +460,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; - if (Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FnegS); + } + else if (Optimizations.UseSse2) { EmitScalarUnaryOpSimd32(context, (m) => { @@ -445,7 +490,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; - if (Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FnmulS); + } + else if (Optimizations.UseSse2) { EmitScalarBinaryOpSimd32(context, (n, m) => { @@ -473,7 +522,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true); } @@ -498,7 +551,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true); } @@ -525,7 +582,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FnegV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { @@ -554,7 +615,11 @@ namespace ARMeilleure.Instructions public static void Vdiv_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FdivS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd); } @@ -573,7 +638,11 @@ namespace ARMeilleure.Instructions public static void Vmaxnm_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmaxnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF32(context, true, true); } @@ -585,7 +654,11 @@ namespace ARMeilleure.Instructions public static void Vmaxnm_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF32(context, true, false); } @@ -597,7 +670,11 @@ namespace ARMeilleure.Instructions public static void Vminnm_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FminnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF32(context, false, true); } @@ -609,7 +686,11 @@ namespace ARMeilleure.Instructions public static void Vminnm_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF32(context, false, false); } @@ -621,7 +702,11 @@ namespace ARMeilleure.Instructions public static void Vmax_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd); } @@ -664,7 +749,11 @@ namespace ARMeilleure.Instructions public static void Vmin_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd); } @@ -707,7 +796,11 @@ namespace ARMeilleure.Instructions public static void Vmla_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd); } @@ -730,7 +823,11 @@ namespace ARMeilleure.Instructions public static void Vmla_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd); } @@ -786,7 +883,11 @@ namespace ARMeilleure.Instructions public static void Vmls_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd); } @@ -809,7 +910,11 @@ namespace ARMeilleure.Instructions public static void Vmls_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd); } @@ -865,7 +970,11 @@ namespace ARMeilleure.Instructions public static void Vmul_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmulS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); } @@ -884,7 +993,11 @@ namespace ARMeilleure.Instructions public static void Vmul_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmulV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); } @@ -975,7 +1088,11 @@ namespace ARMeilleure.Instructions public static void Vpadd_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FaddpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps); } @@ -1008,7 +1125,11 @@ namespace ARMeilleure.Instructions public static void Vpmax_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FmaxpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps); } @@ -1038,7 +1159,11 @@ namespace ARMeilleure.Instructions public static void Vpmin_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FminpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps); } @@ -1217,7 +1342,11 @@ namespace ARMeilleure.Instructions { int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrecpeV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) { EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0); } @@ -1237,7 +1366,11 @@ namespace ARMeilleure.Instructions public static void Vrecps(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrecpsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; bool single = (op.Size & 1) == 0; @@ -1304,7 +1437,11 @@ namespace ARMeilleure.Instructions { int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrsqrteV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) { EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0); } @@ -1324,7 +1461,11 @@ namespace ARMeilleure.Instructions public static void Vrsqrts(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrsqrtsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; bool single = (op.Size & 1) == 0; @@ -1393,7 +1534,11 @@ namespace ARMeilleure.Instructions public static void Vsqrt_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FsqrtS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd); } @@ -1408,7 +1553,11 @@ namespace ARMeilleure.Instructions public static void Vsub_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd); } @@ -1420,7 +1569,11 @@ namespace ARMeilleure.Instructions public static void Vsub_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FsubV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd); } diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs index 71055155..c32b64ba 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCmp.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs @@ -466,12 +466,26 @@ namespace ARMeilleure.Instructions public static void Fcmp_S(ArmEmitterContext context) { - EmitFcmpOrFcmpe(context, signalNaNs: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: false); + } + else + { + EmitFcmpOrFcmpe(context, signalNaNs: false); + } } public static void Fcmpe_S(ArmEmitterContext context) { - EmitFcmpOrFcmpe(context, signalNaNs: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: true); + } + else + { + EmitFcmpOrFcmpe(context, signalNaNs: true); + } } private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs) diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs index 339d3293..a990e057 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs @@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions { public static void Vceq_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, false); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false); } @@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, true); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true); } @@ -55,7 +63,11 @@ namespace ARMeilleure.Instructions public static void Vcge_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseAvx) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false); } @@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseAvx) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true); } @@ -95,7 +111,11 @@ namespace ARMeilleure.Instructions public static void Vcgt_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseAvx) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, false); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false); } @@ -118,7 +138,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseAvx) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, true); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true); } @@ -139,7 +163,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThanOrEqual, true); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true); } @@ -160,7 +188,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThan, true); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true); } @@ -247,12 +279,26 @@ namespace ARMeilleure.Instructions public static void Vcmp(ArmEmitterContext context) { - EmitVcmpOrVcmpe(context, false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, false); + } + else + { + EmitVcmpOrVcmpe(context, false); + } } public static void Vcmpe(ArmEmitterContext context) { - EmitVcmpOrVcmpe(context, true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, true); + } + else + { + EmitVcmpOrVcmpe(context, true); + } } private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs index 7f61cad4..652ad397 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs @@ -164,7 +164,11 @@ namespace ARMeilleure.Instructions public static void Fcvtas_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtasGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false); } @@ -176,7 +180,11 @@ namespace ARMeilleure.Instructions public static void Fcvtas_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtasS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true); } @@ -188,7 +196,11 @@ namespace ARMeilleure.Instructions public static void Fcvtas_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtasS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false); } @@ -200,7 +212,11 @@ namespace ARMeilleure.Instructions public static void Fcvtau_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtauGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false); } @@ -212,7 +228,11 @@ namespace ARMeilleure.Instructions public static void Fcvtau_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtauS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true); } @@ -224,7 +244,11 @@ namespace ARMeilleure.Instructions public static void Fcvtau_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtauV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false); } @@ -240,7 +264,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.UseSse2 && sizeF == 1) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtlV); + } + else if (Optimizations.UseSse2 && sizeF == 1) { Operand n = GetVec(op.Rn); @@ -296,7 +324,11 @@ namespace ARMeilleure.Instructions public static void Fcvtms_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmsGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false); } @@ -308,7 +340,11 @@ namespace ARMeilleure.Instructions public static void Fcvtms_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtmsV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false); } @@ -320,7 +356,11 @@ namespace ARMeilleure.Instructions public static void Fcvtmu_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmuGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false); } @@ -336,7 +376,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.UseSse2 && sizeF == 1) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpFRd(context, Intrinsic.Arm64FcvtnV); + } + else if (Optimizations.UseSse2 && sizeF == 1) { Operand d = GetVec(op.Rd); @@ -405,7 +449,11 @@ namespace ARMeilleure.Instructions public static void Fcvtns_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtnsGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false); } @@ -417,7 +465,11 @@ namespace ARMeilleure.Instructions public static void Fcvtns_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnsS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true); } @@ -429,7 +481,11 @@ namespace ARMeilleure.Instructions public static void Fcvtns_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnsV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false); } @@ -441,7 +497,11 @@ namespace ARMeilleure.Instructions public static void Fcvtnu_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnuS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true); } @@ -453,7 +513,11 @@ namespace ARMeilleure.Instructions public static void Fcvtnu_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnuV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false); } @@ -465,7 +529,11 @@ namespace ARMeilleure.Instructions public static void Fcvtps_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpsGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false); } @@ -477,7 +545,11 @@ namespace ARMeilleure.Instructions public static void Fcvtpu_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpuGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false); } @@ -489,7 +561,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzs_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzsGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false); } @@ -501,7 +577,13 @@ namespace ARMeilleure.Instructions public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzsGpFixed, op.FBits); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true); } @@ -513,7 +595,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzs_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzsS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true); } @@ -525,7 +611,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzs_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzsV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false); } @@ -537,7 +627,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzs_V_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzsVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false); } @@ -549,7 +643,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzu_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzuGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false); } @@ -561,7 +659,13 @@ namespace ARMeilleure.Instructions public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzuGpFixed, op.FBits); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true); } @@ -573,7 +677,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzu_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzuS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true); } @@ -585,7 +693,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzu_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzuV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false); } @@ -597,7 +709,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzu_V_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzuVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false); } @@ -609,41 +725,59 @@ namespace ARMeilleure.Instructions public static void Scvtf_Gp(ArmEmitterContext context) { - OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; - - Operand res = GetIntOrZR(context, op.Rn); - - if (op.RegisterSize == RegisterSize.Int32) + if (Optimizations.UseAdvSimd) { - res = context.SignExtend32(OperandType.I64, res); + InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64ScvtfGp); } + else + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + if (op.RegisterSize == RegisterSize.Int32) + { + res = context.SignExtend32(OperandType.I64, res); + } - res = EmitFPConvert(context, res, op.Size, signed: true); + res = EmitFPConvert(context, res, op.Size, signed: true); - context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } } public static void Scvtf_Gp_Fixed(ArmEmitterContext context) { OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; - Operand res = GetIntOrZR(context, op.Rn); - - if (op.RegisterSize == RegisterSize.Int32) + if (Optimizations.UseAdvSimd) { - res = context.SignExtend32(OperandType.I64, res); + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64ScvtfGpFixed, op.FBits); } + else + { + Operand res = GetIntOrZR(context, op.Rn); - res = EmitFPConvert(context, res, op.Size, signed: true); + if (op.RegisterSize == RegisterSize.Int32) + { + res = context.SignExtend32(OperandType.I64, res); + } - res = EmitI2fFBitsMul(context, res, op.FBits); + res = EmitFPConvert(context, res, op.Size, signed: true); - context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + res = EmitI2fFBitsMul(context, res, op.FBits); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } } public static void Scvtf_S(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64ScvtfS); + } + else if (Optimizations.UseSse2) { EmitSse2ScvtfOp(context, scalar: true); } @@ -655,7 +789,11 @@ namespace ARMeilleure.Instructions public static void Scvtf_S_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64ScvtfSFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) { EmitSse2ScvtfOp(context, scalar: true); } @@ -667,7 +805,11 @@ namespace ARMeilleure.Instructions public static void Scvtf_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64ScvtfV); + } + else if (Optimizations.UseSse2) { EmitSse2ScvtfOp(context, scalar: false); } @@ -679,7 +821,11 @@ namespace ARMeilleure.Instructions public static void Scvtf_V_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64ScvtfVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) { EmitSse2ScvtfOp(context, scalar: false); } @@ -691,31 +837,49 @@ namespace ARMeilleure.Instructions public static void Ucvtf_Gp(ArmEmitterContext context) { - OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64UcvtfGp); + } + else + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; - Operand res = GetIntOrZR(context, op.Rn); + Operand res = GetIntOrZR(context, op.Rn); - res = EmitFPConvert(context, res, op.Size, signed: false); + res = EmitFPConvert(context, res, op.Size, signed: false); - context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } } public static void Ucvtf_Gp_Fixed(ArmEmitterContext context) { OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; - Operand res = GetIntOrZR(context, op.Rn); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64UcvtfGpFixed, op.FBits); + } + else + { + Operand res = GetIntOrZR(context, op.Rn); - res = EmitFPConvert(context, res, op.Size, signed: false); + res = EmitFPConvert(context, res, op.Size, signed: false); - res = EmitI2fFBitsMul(context, res, op.FBits); + res = EmitI2fFBitsMul(context, res, op.FBits); - context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } } public static void Ucvtf_S(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64UcvtfS); + } + else if (Optimizations.UseSse2) { EmitSse2UcvtfOp(context, scalar: true); } @@ -727,7 +891,11 @@ namespace ARMeilleure.Instructions public static void Ucvtf_S_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64UcvtfSFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) { EmitSse2UcvtfOp(context, scalar: true); } @@ -739,7 +907,11 @@ namespace ARMeilleure.Instructions public static void Ucvtf_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64UcvtfV); + } + else if (Optimizations.UseSse2) { EmitSse2UcvtfOp(context, scalar: false); } @@ -751,7 +923,11 @@ namespace ARMeilleure.Instructions public static void Ucvtf_V_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64UcvtfVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) { EmitSse2UcvtfOp(context, scalar: false); } diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs index f3f23958..5fdc3b5a 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -59,7 +59,11 @@ namespace ARMeilleure.Instructions if (toInteger) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV); + } + else if (Optimizations.UseSse41) { EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned); } @@ -153,7 +157,28 @@ namespace ARMeilleure.Instructions bool unsigned = (op.Opc2 & 1) == 0; bool roundWithFpscr = op.Opc != 1; - if (!roundWithFpscr && Optimizations.UseSse41) + if (!roundWithFpscr && Optimizations.UseAdvSimd) + { + bool doubleSize = floatSize == OperandType.FP64; + + if (doubleSize) + { + Operand m = GetVecA32(op.Vm >> 1); + + Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, doubleSize); + + Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble; + + Operand asInteger = context.AddIntrinsicInt(inst, toConvert); + + InsertScalar(context, op.Vd, asInteger); + } + else + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS); + } + } + else if (!roundWithFpscr && Optimizations.UseSse41) { EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned); } @@ -231,7 +256,34 @@ namespace ARMeilleure.Instructions bool unsigned = op.Opc == 0; int rm = op.Opc2 & 3; - if (Optimizations.UseSse41) + Intrinsic inst; + + if (Optimizations.UseAdvSimd) + { + if (unsigned) + { + inst = rm switch { + 0b00 => Intrinsic.Arm64FcvtauS, + 0b01 => Intrinsic.Arm64FcvtnuS, + 0b10 => Intrinsic.Arm64FcvtpuS, + 0b11 => Intrinsic.Arm64FcvtmuS, + _ => throw new ArgumentOutOfRangeException(nameof(rm)) + }; + } + else + { + inst = rm switch { + 0b00 => Intrinsic.Arm64FcvtasS, + 0b01 => Intrinsic.Arm64FcvtnsS, + 0b10 => Intrinsic.Arm64FcvtpsS, + 0b11 => Intrinsic.Arm64FcvtmsS, + _ => throw new ArgumentOutOfRangeException(nameof(rm)) + }; + } + + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst); + } + else if (Optimizations.UseSse41) { EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned); } @@ -338,7 +390,19 @@ namespace ARMeilleure.Instructions int rm = op.Opc2 & 3; - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + Intrinsic inst = rm switch { + 0b00 => Intrinsic.Arm64FrintaS, + 0b01 => Intrinsic.Arm64FrintnS, + 0b10 => Intrinsic.Arm64FrintpS, + 0b11 => Intrinsic.Arm64FrintmS, + _ => throw new ArgumentOutOfRangeException(nameof(rm)) + }; + + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst); + } + else if (Optimizations.UseSse41) { EmitScalarUnaryOpSimd32(context, (m) => { @@ -382,12 +446,9 @@ namespace ARMeilleure.Instructions // VRINTA (vector). public static void Vrinta_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) { - EmitVectorUnaryOpSimd32(context, (m) => - { - return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: false); - }); + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS); } else { @@ -398,7 +459,11 @@ namespace ARMeilleure.Instructions // VRINTM (vector). public static void Vrintm_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS); + } + else if (Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { @@ -414,7 +479,11 @@ namespace ARMeilleure.Instructions // VRINTN (vector). public static void Vrintn_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS); + } + else if (Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { @@ -430,7 +499,11 @@ namespace ARMeilleure.Instructions // VRINTP (vector). public static void Vrintp_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS); + } + else if (Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { @@ -448,7 +521,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS); + } + else if (Optimizations.UseSse2) { EmitScalarUnaryOpSimd32(context, (m) => { diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs new file mode 100644 index 00000000..98236be6 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs @@ -0,0 +1,366 @@ + +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func<Operand, Operand>; + using Func2I = Func<Operand, Operand, Operand>; + using Func3I = Func<Operand, Operand, Operand, Operand>; + + static class InstEmitSimdHelper32Arm64 + { + // Intrinsic Helpers + + public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV) + { + Debug.Assert(input.Type == OperandType.V128); + + int originalSide = originalV & 1; + int targetSide = targetV & 1; + + if (originalSide == targetSide) + { + return input; + } + + Intrinsic vType = Intrinsic.Arm64VDWord | Intrinsic.Arm64V128; + + if (targetSide == 1) + { + return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 0)); // Low to high. + } + else + { + return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 1)); // High to low. + } + } + + public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV) + { + Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128); + + int targetSide = targetV & 1; + Operand idx = Const(targetSide); + + return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, idx, value, idx); + } + + public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth) + { + Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128); + + // Insert from index 0 in value to index in target. + int index = reg & (doubleWidth ? 1 : 3); + + if (doubleWidth) + { + return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, Const(index), value, Const(0)); + } + else + { + return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VWord, target, Const(index), value, Const(0)); + } + } + + public static Operand EmitExtractScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth) + { + int index = reg & (doubleWidth ? 1 : 3); + if (index == 0) return target; // Element is already at index 0, so just return the vector directly. + + if (doubleWidth) + { + return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VDWord, target, Const(1)); // Extract high (index 1). + } + else + { + return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VWord, target, Const(index)); // Extract element at index. + } + } + + // Vector Operand Templates + + public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd); + } + + Operand res = vectorFunc(m); + + if (!op.Q) // Register insert. + { + res = EmitDoubleWordInsert(context, d, res, op.Vd); + } + + context.Copy(d, res); + } + + public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m)); + } + + public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand n = GetVecA32(op.Qn); + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + + if (side == -1) + { + side = op.Vd; + } + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + n = EmitMoveDoubleWordToSide(context, n, op.Vn, side); + m = EmitMoveDoubleWordToSide(context, m, op.Vm, side); + } + + Operand res = vectorFunc(n, m); + + if (!op.Q) // Register insert. + { + if (side != op.Vd) + { + res = EmitMoveDoubleWordToSide(context, res, side, op.Vd); + } + res = EmitDoubleWordInsert(context, d, res, op.Vd); + } + + context.Copy(d, res); + } + + public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m)); + } + + public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand n = GetVecA32(op.Qn); + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + Operand initialD = d; + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd); + m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd); + } + + Operand res = vectorFunc(d, n, m); + + if (!op.Q) // Register insert. + { + res = EmitDoubleWordInsert(context, initialD, res, op.Vd); + } + + context.Copy(initialD, res); + } + + public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + + m = EmitExtractScalar(context, m, op.Vm, doubleSize); + + Operand res = scalarFunc(m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, d, res, op.Vd, doubleSize); + + context.Copy(d, res); + } + + public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m)); + } + + public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vn >> shift); + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + + n = EmitExtractScalar(context, n, op.Vn, doubleSize); + m = EmitExtractScalar(context, m, op.Vm, doubleSize); + + Operand res = scalarFunc(n, m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, d, res, op.Vd, doubleSize); + + context.Copy(d, res); + } + + public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m)); + } + + public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vn >> shift); + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + Operand initialD = d; + + n = EmitExtractScalar(context, n, op.Vn, doubleSize); + m = EmitExtractScalar(context, m, op.Vm, doubleSize); + d = EmitExtractScalar(context, d, op.Vd, doubleSize); + + Operand res = scalarFunc(d, n, m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize); + + context.Copy(initialD, res); + } + + public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitScalarTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m)); + } + + // Pairwise + + public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + inst32 |= Intrinsic.Arm64V64 | Intrinsic.Arm64VFloat; + EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst32, n, m), 0); + } + + public static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool cmpWithZero = (op.Opc & 2) != 0; + + Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS; + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vd >> shift); + Operand m = GetVecA32(op.Vm >> shift); + + n = EmitExtractScalar(context, n, op.Vd, doubleSize); + m = cmpWithZero ? Const(0) : EmitExtractScalar(context, m, op.Vm, doubleSize); + + Operand nzcv = context.AddIntrinsicInt(inst, n, m); + + Operand one = Const(1); + + SetFpFlag(context, FPState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one)); + SetFpFlag(context, FPState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one)); + SetFpFlag(context, FPState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one)); + SetFpFlag(context, FPState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one)); + } + + public static void EmitCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int sizeF = op.Size & 1; + + Intrinsic inst; + if (zero) + { + inst = cond switch + { + CmpCondition.Equal => Intrinsic.Arm64FcmeqVz, + CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtVz, + CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeVz, + CmpCondition.LessThan => Intrinsic.Arm64FcmltVz, + CmpCondition.LessThanOrEqual => Intrinsic.Arm64FcmleVz, + _ => throw new InvalidOperationException() + }; + } + else { + inst = cond switch + { + CmpCondition.Equal => Intrinsic.Arm64FcmeqV, + CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtV, + CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeV, + _ => throw new InvalidOperationException() + }; + } + + inst |= (sizeF != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + + if (zero) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return context.AddIntrinsic(inst, m); + }); + } + else + { + EmitVectorBinaryOpSimd32(context, (n, m) => + { + return context.AddIntrinsic(inst, n, m); + }); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs b/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs new file mode 100644 index 00000000..f0d242ae --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs @@ -0,0 +1,720 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static class InstEmitSimdHelperArm64 + { + public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitScalarUnaryOpFFromGp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitScalarUnaryOpFToGp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (inst, n) + : context.AddIntrinsicLong(inst, n)); + } + + public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitScalarBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index))); + } + + public static void EmitScalarTernaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + Operand a = GetVec(op.Ra); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, a, n, m)); + } + + public static void EmitScalarTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index))); + } + + public static void EmitScalarUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitScalarBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitScalarBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n)); + } + + public static void EmitScalarTernaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(d, context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitScalarShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift))); + } + + public static void EmitScalarShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + } + + public static void EmitScalarSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand result = context.AddIntrinsic(inst, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand result = context.AddIntrinsic(inst, n, m); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand result = context.AddIntrinsic(inst, d, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits))); + } + + public static void EmitScalarConvertBinaryOpFFromGp(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits))); + } + + public static void EmitScalarConvertBinaryOpFToGp(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (inst, n, Const(fBits)) + : context.AddIntrinsicLong(inst, n, Const(fBits))); + } + + public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitVectorBinaryOpFRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n)); + } + + public static void EmitVectorBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index))); + } + + public static void EmitVectorTernaryOpFRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitVectorTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index))); + } + + public static void EmitVectorUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitVectorBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitVectorBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n)); + } + + public static void EmitVectorBinaryOpByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index))); + } + + public static void EmitVectorTernaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitVectorTernaryOpRdByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index))); + } + + public static void EmitVectorShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift))); + } + + public static void EmitVectorShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + } + + public static void EmitVectorSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, n, m); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, d, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingBinaryOpByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, n, m, Const(op.Index)); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits))); + } + + public static void EmitVectorLookupTable(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp; + + Operand[] operands = new Operand[op.Size + 1]; + + operands[op.Size] = GetVec(op.Rm); + + for (int index = 0; index < op.Size; index++) + { + operands[index] = GetVec((op.Rn + index) & 0x1F); + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, operands)); + } + + public static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false; + + Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS; + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + Operand n = GetVec(op.Rn); + Operand m = cmpWithZero ? Const(0) : GetVec(op.Rm); + + Operand nzcv = context.AddIntrinsicInt(inst, n, m); + + Operand one = Const(1); + + SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one)); + SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one)); + SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one)); + SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one)); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs index 624ae841..8ca81580 100644 --- a/ARMeilleure/Instructions/InstEmitSimdLogical.cs +++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs @@ -14,7 +14,11 @@ namespace ARMeilleure.Instructions { public static void And_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions public static void Bic_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -98,12 +106,26 @@ namespace ARMeilleure.Instructions public static void Bif_V(ArmEmitterContext context) { - EmitBifBit(context, notRm: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV); + } + else + { + EmitBifBit(context, notRm: true); + } } public static void Bit_V(ArmEmitterContext context) { - EmitBifBit(context, notRm: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV); + } + else + { + EmitBifBit(context, notRm: false); + } } private static void EmitBifBit(ArmEmitterContext context, bool notRm) @@ -167,7 +189,11 @@ namespace ARMeilleure.Instructions public static void Bsl_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -200,7 +226,11 @@ namespace ARMeilleure.Instructions public static void Eor_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -249,7 +279,11 @@ namespace ARMeilleure.Instructions public static void Orn_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -280,7 +314,11 @@ namespace ARMeilleure.Instructions public static void Orr_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs index dd686d4d..c2a04778 100644 --- a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs @@ -13,7 +13,11 @@ namespace ARMeilleure.Instructions { public static void Vand_I(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64AndV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) { EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m)); } @@ -25,7 +29,11 @@ namespace ARMeilleure.Instructions public static void Vbic_I(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64BicV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) { EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n)); } @@ -73,17 +81,35 @@ namespace ARMeilleure.Instructions public static void Vbif(ArmEmitterContext context) { - EmitBifBit(context, true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BifV | Intrinsic.Arm64V128, d, n, m)); + } + else + { + EmitBifBit(context, true); + } } public static void Vbit(ArmEmitterContext context) { - EmitBifBit(context, false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BitV | Intrinsic.Arm64V128, d, n, m)); + } + else + { + EmitBifBit(context, false); + } } public static void Vbsl(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BslV | Intrinsic.Arm64V128, d, n, m)); + } + else if (Optimizations.UseSse2) { EmitVectorTernaryOpSimd32(context, (d, n, m) => { @@ -105,7 +131,11 @@ namespace ARMeilleure.Instructions public static void Veor_I(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64EorV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) { EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m)); } @@ -117,7 +147,11 @@ namespace ARMeilleure.Instructions public static void Vorn_I(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) { Operand mask = context.VectorOne(); @@ -135,7 +169,11 @@ namespace ARMeilleure.Instructions public static void Vorr_I(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrrV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) { EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m)); } diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs index 7da180fc..17100eb9 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -392,7 +392,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Zip1V, Intrinsic.Arm64Zip2V); + } + else if (Optimizations.UseSse2) { EmitVectorShuffleOpSimd32(context, (m, d) => { @@ -461,7 +465,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Uzp1V, Intrinsic.Arm64Uzp2V); + } + else if (Optimizations.UseSsse3) { EmitVectorShuffleOpSimd32(context, (m, d) => { @@ -559,6 +567,52 @@ namespace ARMeilleure.Instructions } } + private static void EmitVectorZipUzpOpSimd32(ArmEmitterContext context, Intrinsic inst1, Intrinsic inst2) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + bool overlap = op.Qm == op.Qd; + + Operand d = GetVecA32(op.Qd); + Operand m = GetVecA32(op.Qm); + + Operand dPart = d; + Operand mPart = m; + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + dPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, d, op.Vd, 0); + mPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, m, op.Vm, 0); + } + + Intrinsic vSize = op.Q ? Intrinsic.Arm64V128 : Intrinsic.Arm64V64; + + vSize |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand resD = context.AddIntrinsic(inst1 | vSize, dPart, mPart); + Operand resM = context.AddIntrinsic(inst2 | vSize, dPart, mPart); + + if (!op.Q) // Register insert. + { + resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, d, Const(op.Vd & 1), resD, Const(0)); + + if (overlap) + { + resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, resD, Const(op.Vm & 1), resM, Const(0)); + } + else + { + resM = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, m, Const(op.Vm & 1), resM, Const(0)); + } + } + + context.Copy(d, resD); + if (!overlap) + { + context.Copy(m, resM); + } + } + private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs index cf3b51bd..19e41119 100644 --- a/ARMeilleure/Instructions/InstEmitSimdShift.cs +++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs @@ -26,7 +26,15 @@ namespace ARMeilleure.Instructions public static void Rshrn_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64RshrnV, shift); + } + else if (Optimizations.UseSsse3) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; @@ -80,7 +88,14 @@ namespace ARMeilleure.Instructions int shift = GetImmShl(op); - EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64ShlS, shift); + } + else + { + EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + } } public static void Shl_V(ArmEmitterContext context) @@ -90,7 +105,11 @@ namespace ARMeilleure.Instructions int shift = GetImmShl(op); int eSize = 8 << op.Size; - if (shift >= eSize) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64ShlV, shift); + } + else if (shift >= eSize) { if ((op.RegisterSize == RegisterSize.Simd64)) { @@ -143,7 +162,11 @@ namespace ARMeilleure.Instructions int shift = 8 << op.Size; - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ShllV); + } + else if (Optimizations.UseSse41) { Operand n = GetVec(op.Rn); @@ -170,7 +193,15 @@ namespace ARMeilleure.Instructions public static void Shrn_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64ShrnV, shift); + } + else if (Optimizations.UseSsse3) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; @@ -205,89 +236,259 @@ namespace ARMeilleure.Instructions public static void Sli_S(ArmEmitterContext context) { - EmitSli(context, scalar: true); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SliS, shift); + } + else + { + EmitSli(context, scalar: true); + } } public static void Sli_V(ArmEmitterContext context) { - EmitSli(context, scalar: false); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SliV, shift); + } + else + { + EmitSli(context, scalar: false); + } } public static void Sqrshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating); + } } public static void Sqrshrn_S(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnS, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } } public static void Sqrshrn_V(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnV, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } } public static void Sqrshrun_S(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunS, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } } public static void Sqrshrun_V(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunV, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } } public static void Sqshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating); + } } public static void Sqshrn_S(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnS, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } } public static void Sqshrn_V(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnV, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } } public static void Sqshrun_S(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunS, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } } public static void Sqshrun_V(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunV, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } } public static void Sri_S(ArmEmitterContext context) { - EmitSri(context, scalar: true); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SriS, shift); + } + else + { + EmitSri(context, scalar: true); + } } public static void Sri_V(ArmEmitterContext context) { - EmitSri(context, scalar: false); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SriV, shift); + } + else + { + EmitSri(context, scalar: false); + } } public static void Srshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round); + } } public static void Srshr_S(ArmEmitterContext context) { - EmitScalarShrImmOpSx(context, ShrImmFlags.Round); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SrshrS, shift); + } + else + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round); + } } public static void Srshr_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SrshrV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { int shift = GetImmShr(op); int eSize = 8 << op.Size; @@ -325,14 +526,31 @@ namespace ARMeilleure.Instructions public static void Srsra_S(ArmEmitterContext context) { - EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SrsraS, shift); + } + else + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } } public static void Srsra_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SrsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { int shift = GetImmShr(op); int eSize = 8 << op.Size; @@ -372,12 +590,26 @@ namespace ARMeilleure.Instructions public static void Sshl_S(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SshlS); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed); + } } public static void Sshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Signed); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed); + } } public static void Sshll_V(ArmEmitterContext context) @@ -386,7 +618,11 @@ namespace ARMeilleure.Instructions int shift = GetImmShl(op); - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshllV, shift); + } + else if (Optimizations.UseSse41) { Operand n = GetVec(op.Rn); @@ -416,7 +652,18 @@ namespace ARMeilleure.Instructions public static void Sshr_S(ArmEmitterContext context) { - EmitShrImmOp(context, ShrImmFlags.ScalarSx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SshrS, shift); + } + else + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx); + } } public static void Sshr_V(ArmEmitterContext context) @@ -425,7 +672,11 @@ namespace ARMeilleure.Instructions int shift = GetImmShr(op); - if (Optimizations.UseGfni && op.Size == 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshrV, shift); + } + else if (Optimizations.UseGfni && op.Size == 0) { Operand n = GetVec(op.Rn); @@ -478,14 +729,31 @@ namespace ARMeilleure.Instructions public static void Ssra_S(ArmEmitterContext context) { - EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SsraS, shift); + } + else + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate); + } } public static void Ssra_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { int shift = GetImmShr(op); @@ -515,49 +783,131 @@ namespace ARMeilleure.Instructions public static void Uqrshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating); + } } public static void Uqrshrn_S(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnS, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } } public static void Uqrshrn_V(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnV, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } } public static void Uqshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Saturating); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Saturating); + } } public static void Uqshrn_S(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnS, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } } public static void Uqshrn_V(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnV, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } } public static void Urshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Round); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Round); + } } public static void Urshr_S(ArmEmitterContext context) { - EmitScalarShrImmOpZx(context, ShrImmFlags.Round); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UrshrS, shift); + } + else + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round); + } } public static void Urshr_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UrshrV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) { int shift = GetImmShr(op); int eSize = 8 << op.Size; @@ -593,14 +943,31 @@ namespace ARMeilleure.Instructions public static void Ursra_S(ArmEmitterContext context) { - EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UrsraS, shift); + } + else + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } } public static void Ursra_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UrsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) { int shift = GetImmShr(op); int eSize = 8 << op.Size; @@ -638,12 +1005,26 @@ namespace ARMeilleure.Instructions public static void Ushl_S(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Scalar); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64UshlS); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Scalar); + } } public static void Ushl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.None); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.None); + } } public static void Ushll_V(ArmEmitterContext context) @@ -652,7 +1033,11 @@ namespace ARMeilleure.Instructions int shift = GetImmShl(op); - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshllV, shift); + } + else if (Optimizations.UseSse41) { Operand n = GetVec(op.Rn); @@ -682,14 +1067,31 @@ namespace ARMeilleure.Instructions public static void Ushr_S(ArmEmitterContext context) { - EmitShrImmOp(context, ShrImmFlags.ScalarZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UshrS, shift); + } + else + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx); + } } public static void Ushr_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshrV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) { int shift = GetImmShr(op); @@ -714,14 +1116,31 @@ namespace ARMeilleure.Instructions public static void Usra_S(ArmEmitterContext context) { - EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UsraS, shift); + } + else + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate); + } } public static void Usra_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) { int shift = GetImmShr(op); diff --git a/ARMeilleure/Instructions/InstEmitSystem.cs b/ARMeilleure/Instructions/InstEmitSystem.cs index cc32228c..1345bbf1 100644 --- a/ARMeilleure/Instructions/InstEmitSystem.cs +++ b/ARMeilleure/Instructions/InstEmitSystem.cs @@ -150,6 +150,8 @@ namespace ARMeilleure.Instructions { OpCodeSystem op = (OpCodeSystem)context.CurrOp; + context.SyncQcFlag(); + Operand fpsr = Const(0); for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++) @@ -196,6 +198,8 @@ namespace ARMeilleure.Instructions { OpCodeSystem op = (OpCodeSystem)context.CurrOp; + context.ClearQcFlagIfModified(); + Operand fpsr = GetIntOrZR(context, op.Rt); fpsr = context.ConvertI64ToI32(fpsr); -- cgit v1.2.3-70-g09d2