diff options
author | sharmander <saldabain.dev@gmail.com> | 2020-12-14 22:01:52 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-15 00:01:52 -0300 |
commit | 3332b29f01993d22195f417d39ae4074d53fb55c (patch) | |
tree | f573fb1e8113fd3359d97e99b7d9cdebef4265af /ARMeilleure | |
parent | 47ba81c661c7ccd8bf2c972b73ad444cbf0d9744 (diff) |
CPU: Implement VFMA (Vector) (#1762)
* Implement VFMA.F64
* Simplify switch
* Simplify FMA Instructions into their own IntrinsicType.
* Remove whitespace
* Fix indentation
* Change tests for Vfnms -- disable inf / nan
* Move args up, not description ;)
* Implementation Complete.
All Tests Pass (Slow / Fast Path)
* Move location of function in assembler + test updates.
* Shift params upwards
* Remove unused function
* Update PTC version.
* Add comments / re-oreder opcode table.
* Remove whitespace
* Fix nit
* Fix nit.
* Fix whitespace
* Wrong opcode was used by a bad merge.
* Addressed rip's comments.
Diffstat (limited to 'ARMeilleure')
-rw-r--r-- | ARMeilleure/CodeGen/X86/Assembler.cs | 8 | ||||
-rw-r--r-- | ARMeilleure/Decoders/OpCodeTable.cs | 1 | ||||
-rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs | 17 | ||||
-rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdHelper32.cs | 12 |
4 files changed, 34 insertions, 4 deletions
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index b242a171..7f19c3c4 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -273,10 +273,10 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); - Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66)); - Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); - Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66)); - Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); Add(X86Instruction.Vfmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vfmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66)); diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 3d3b26f9..5cf83476 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -820,6 +820,7 @@ namespace ARMeilleure.Decoders SetA32("111100110x00xxxxxxxx0001xxx1xxxx", InstName.Veor, InstEmit32.Veor_I, OpCode32SimdBinary.Create); SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, OpCode32SimdExt.Create); SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create); + SetA32("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create); SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create); SetA32("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create); SetA32("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create); diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index d72df97c..40289520 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -252,6 +252,23 @@ namespace ARMeilleure.Instructions } } + public static void Vfma_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseFma) + { + // Vectors contain elements that are 32-bits in length always. The only thing that will change is the number of elements in a vector. + // The 64-bit variant will never be used. + EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps, Intrinsic.X86Vfmadd231pd); + } + else + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); + }); + } + } + public static void Vfma_S(ArmEmitterContext context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index 75aa7220..2d5d4ba9 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -820,6 +820,18 @@ namespace ARMeilleure.Instructions }); } + public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + EmitVectorTernaryOpSimd32(context, (d, n, m) => + { + return context.AddIntrinsic(inst, d, n, m); + }); + } + public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc) { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; |