aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure
diff options
context:
space:
mode:
authorsharmander <saldabain.dev@gmail.com>2020-12-14 22:01:52 -0500
committerGitHub <noreply@github.com>2020-12-15 00:01:52 -0300
commit3332b29f01993d22195f417d39ae4074d53fb55c (patch)
treef573fb1e8113fd3359d97e99b7d9cdebef4265af /ARMeilleure
parent47ba81c661c7ccd8bf2c972b73ad444cbf0d9744 (diff)
CPU: Implement VFMA (Vector) (#1762)
* Implement VFMA.F64 * Simplify switch * Simplify FMA Instructions into their own IntrinsicType. * Remove whitespace * Fix indentation * Change tests for Vfnms -- disable inf / nan * Move args up, not description ;) * Implementation Complete. All Tests Pass (Slow / Fast Path) * Move location of function in assembler + test updates. * Shift params upwards * Remove unused function * Update PTC version. * Add comments / re-oreder opcode table. * Remove whitespace * Fix nit * Fix nit. * Fix whitespace * Wrong opcode was used by a bad merge. * Addressed rip's comments.
Diffstat (limited to 'ARMeilleure')
-rw-r--r--ARMeilleure/CodeGen/X86/Assembler.cs8
-rw-r--r--ARMeilleure/Decoders/OpCodeTable.cs1
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs17
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHelper32.cs12
4 files changed, 34 insertions, 4 deletions
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs
index b242a171..7f19c3c4 100644
--- a/ARMeilleure/CodeGen/X86/Assembler.cs
+++ b/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -273,10 +273,10 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
- Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
- Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
- Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
- Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index 3d3b26f9..5cf83476 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -820,6 +820,7 @@ namespace ARMeilleure.Decoders
SetA32("111100110x00xxxxxxxx0001xxx1xxxx", InstName.Veor, InstEmit32.Veor_I, OpCode32SimdBinary.Create);
SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, OpCode32SimdExt.Create);
SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create);
+ SetA32("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create);
SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create);
SetA32("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create);
SetA32("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create);
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
index d72df97c..40289520 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -252,6 +252,23 @@ namespace ARMeilleure.Instructions
}
}
+ public static void Vfma_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ // Vectors contain elements that are 32-bits in length always. The only thing that will change is the number of elements in a vector.
+ // The 64-bit variant will never be used.
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps, Intrinsic.X86Vfmadd231pd);
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
public static void Vfma_S(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseSse2)
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
index 75aa7220..2d5d4ba9 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -820,6 +820,18 @@ namespace ARMeilleure.Instructions
});
}
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ return context.AddIntrinsic(inst, d, n, m);
+ });
+ }
+
public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;