aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs2
-rw-r--r--ARMeilleure/CodeGen/X86/AssemblerTable.cs2
-rw-r--r--ARMeilleure/CodeGen/X86/CodeGenerator.cs25
-rw-r--r--ARMeilleure/CodeGen/X86/IntrinsicTable.cs6
-rw-r--r--ARMeilleure/CodeGen/X86/Mxcsr.cs15
-rw-r--r--ARMeilleure/CodeGen/X86/PreAllocator.cs8
-rw-r--r--ARMeilleure/CodeGen/X86/X86Instruction.cs2
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic.cs512
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdCvt32.cs17
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHelper.cs105
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHelper32.cs2
-rw-r--r--ARMeilleure/Instructions/InstEmitSystem.cs4
-rw-r--r--ARMeilleure/Instructions/InstEmitSystem32.cs2
-rw-r--r--ARMeilleure/IntermediateRepresentation/Intrinsic.cs8
-rw-r--r--ARMeilleure/Translation/ArmEmitterContext.cs15
-rw-r--r--ARMeilleure/Translation/DispatcherFunction.cs1
-rw-r--r--ARMeilleure/Translation/PTC/Ptc.cs2
-rw-r--r--ARMeilleure/Translation/TranslatedFunction.cs5
-rw-r--r--ARMeilleure/Translation/Translator.cs4
-rw-r--r--ARMeilleure/Translation/TranslatorStubs.cs71
-rw-r--r--ARMeilleure/Translation/TranslatorTestMethods.cs148
-rw-r--r--Ryujinx.Tests/Cpu/EnvironmentTests.cs91
22 files changed, 822 insertions, 225 deletions
diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
index 53ef152e..a309d56d 100644
--- a/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
+++ b/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
@@ -226,6 +226,8 @@ namespace ARMeilleure.CodeGen.Arm64
Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi));
+ Add(Intrinsic.Arm64MrsFpcr, new IntrinsicInfo(0xd53b4400u, IntrinsicType.GetRegister));
+ Add(Intrinsic.Arm64MsrFpcr, new IntrinsicInfo(0xd51b4400u, IntrinsicType.SetRegister));
Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister));
Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister));
Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem));
diff --git a/ARMeilleure/CodeGen/X86/AssemblerTable.cs b/ARMeilleure/CodeGen/X86/AssemblerTable.cs
index b47b3ecd..e6a2ff07 100644
--- a/ARMeilleure/CodeGen/X86/AssemblerTable.cs
+++ b/ARMeilleure/CodeGen/X86/AssemblerTable.cs
@@ -268,11 +268,13 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfnmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfnmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfnmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfnmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
index 8b5a3fc5..e7179b51 100644
--- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs
+++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -249,10 +249,9 @@ namespace ARMeilleure.CodeGen.X86
case IntrinsicType.Mxcsr:
{
Operand offset = operation.GetSource(0);
- Operand bits = operation.GetSource(1);
- Debug.Assert(offset.Kind == OperandKind.Constant && bits.Kind == OperandKind.Constant);
- Debug.Assert(offset.Type == OperandType.I32 && bits.Type == OperandType.I32);
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+ Debug.Assert(offset.Type == OperandType.I32);
int offs = offset.AsInt32() + context.CallArgsRegionSize;
@@ -261,21 +260,23 @@ namespace ARMeilleure.CodeGen.X86
Debug.Assert(HardwareCapabilities.SupportsSse || HardwareCapabilities.SupportsVexEncoding);
- context.Assembler.Stmxcsr(memOp);
-
- if (operation.Intrinsic == Intrinsic.X86Mxcsrmb)
+ if (operation.Intrinsic == Intrinsic.X86Ldmxcsr)
{
- context.Assembler.Or(memOp, bits, OperandType.I32);
+ Operand bits = operation.GetSource(1);
+ Debug.Assert(bits.Type == OperandType.I32);
+
+ context.Assembler.Mov(memOp, bits, OperandType.I32);
+ context.Assembler.Ldmxcsr(memOp);
}
- else /* if (intrinOp.Intrinsic == Intrinsic.X86Mxcsrub) */
+ else if (operation.Intrinsic == Intrinsic.X86Stmxcsr)
{
- Operand notBits = Const(~bits.AsInt32());
+ Operand dest = operation.Destination;
+ Debug.Assert(dest.Type == OperandType.I32);
- context.Assembler.And(memOp, notBits, OperandType.I32);
+ context.Assembler.Stmxcsr(memOp);
+ context.Assembler.Mov(dest, memOp, OperandType.I32);
}
- context.Assembler.Ldmxcsr(memOp);
-
break;
}
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
index c788fa44..e3d94b7a 100644
--- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -60,6 +60,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary));
Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary));
Add(Intrinsic.X86Insertps, new IntrinsicInfo(X86Instruction.Insertps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Ldmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr));
Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary));
Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary));
Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary));
@@ -75,8 +76,6 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary));
- Add(Intrinsic.X86Mxcsrmb, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); // Mask bits.
- Add(Intrinsic.X86Mxcsrub, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); // Unmask bits.
Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary));
Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
@@ -160,6 +159,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary));
Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary));
Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Stmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr));
Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary));
Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary));
Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary));
@@ -170,11 +170,13 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary));
Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Vfmadd231pd, new IntrinsicInfo(X86Instruction.Vfmadd231pd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231pd, new IntrinsicInfo(X86Instruction.Vfnmadd231pd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmadd231ps, new IntrinsicInfo(X86Instruction.Vfnmadd231ps, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmadd231sd, new IntrinsicInfo(X86Instruction.Vfnmadd231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma));
diff --git a/ARMeilleure/CodeGen/X86/Mxcsr.cs b/ARMeilleure/CodeGen/X86/Mxcsr.cs
new file mode 100644
index 00000000..c61eac31
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/Mxcsr.cs
@@ -0,0 +1,15 @@
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ [Flags]
+ enum Mxcsr
+ {
+ Ftz = 1 << 15, // Flush To Zero.
+ Rhi = 1 << 14, // Round Mode high bit.
+ Rlo = 1 << 13, // Round Mode low bit.
+ Um = 1 << 11, // Underflow Mask.
+ Dm = 1 << 8, // Denormal Mask.
+ Daz = 1 << 6 // Denormals Are Zero.
+ }
+}
diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs
index 72f56514..cb742d67 100644
--- a/ARMeilleure/CodeGen/X86/PreAllocator.cs
+++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs
@@ -120,12 +120,18 @@ namespace ARMeilleure.CodeGen.X86
break;
case Instruction.Extended:
- if (node.Intrinsic == Intrinsic.X86Mxcsrmb || node.Intrinsic == Intrinsic.X86Mxcsrub)
+ if (node.Intrinsic == Intrinsic.X86Ldmxcsr)
{
int stackOffset = stackAlloc.Allocate(OperandType.I32);
node.SetSources(new Operand[] { Const(stackOffset), node.GetSource(0) });
}
+ else if (node.Intrinsic == Intrinsic.X86Stmxcsr)
+ {
+ int stackOffset = stackAlloc.Allocate(OperandType.I32);
+
+ node.SetSources(new Operand[] { Const(stackOffset) });
+ }
break;
}
}
diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs
index ecfc432d..9a85c516 100644
--- a/ARMeilleure/CodeGen/X86/X86Instruction.cs
+++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -208,11 +208,13 @@ namespace ARMeilleure.CodeGen.X86
Vblendvps,
Vcvtph2ps,
Vcvtps2ph,
+ Vfmadd231pd,
Vfmadd231ps,
Vfmadd231sd,
Vfmadd231ss,
Vfmsub231sd,
Vfmsub231ss,
+ Vfnmadd231pd,
Vfnmadd231ps,
Vfnmadd231sd,
Vfnmadd231ss,
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index d0bb68e4..7e7f26b1 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -615,14 +615,11 @@ namespace ARMeilleure.Instructions
{
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
- return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
- {
- IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
- Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd;
+ Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd;
- return context.AddIntrinsic(addInst, op1, op2);
- }, scalar: false, op1, op2);
+ return context.AddIntrinsic(addInst, op1, op2);
}, scalar: false, op1, op2);
});
}
@@ -696,17 +693,33 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
+ Operand res;
+
if (op.Size == 0)
{
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
- res = context.AddIntrinsic(Intrinsic.X86Addss, a, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addss, a, res);
+ }
context.Copy(d, context.VectorZeroUpper96(res));
}
else /* if (op.Size == 1) */
{
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
- res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res);
+ }
context.Copy(d, context.VectorZeroUpper64(res));
}
@@ -730,10 +743,7 @@ namespace ARMeilleure.Instructions
{
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
- return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
- {
- return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
- }, scalar: true, op1, op2);
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
}, scalar: true);
}
else
@@ -755,10 +765,7 @@ namespace ARMeilleure.Instructions
{
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
- return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
- {
- return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
- }, scalar: false, op1, op2);
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
}, scalar: false);
}
else
@@ -886,10 +893,7 @@ namespace ARMeilleure.Instructions
{
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
- return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
- {
- return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
- }, scalar: false, op1, op2);
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
}, scalar: false, op1, op2);
});
}
@@ -914,10 +918,7 @@ namespace ARMeilleure.Instructions
{
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
- return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
- {
- return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
- }, scalar: false, op1, op2);
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
}, scalar: false, op1, op2);
});
}
@@ -940,10 +941,7 @@ namespace ARMeilleure.Instructions
{
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
- return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
- {
- return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
- }, scalar: true, op1, op2);
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
}, scalar: true);
}
else
@@ -965,10 +963,7 @@ namespace ARMeilleure.Instructions
{
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
- return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
- {
- return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
- }, scalar: false, op1, op2);
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
}, scalar: false);
}
else
@@ -1096,10 +1091,7 @@ namespace ARMeilleure.Instructions
{
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
- return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
- {
- return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
- }, scalar: false, op1, op2);
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
}, scalar: false, op1, op2);
});
}
@@ -1124,10 +1116,7 @@ namespace ARMeilleure.Instructions
{
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
- return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
- {
- return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
- }, scalar: false, op1, op2);
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
}, scalar: false, op1, op2);
});
}
@@ -1146,6 +1135,37 @@ namespace ARMeilleure.Instructions
{
InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaSe);
}
+ else if (Optimizations.UseFma)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
else
{
EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
@@ -1171,11 +1191,19 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
+ Operand res;
+
if (sizeF == 0)
{
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
-
- res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+ }
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -1186,9 +1214,15 @@ namespace ARMeilleure.Instructions
}
else /* if (sizeF == 1) */
{
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
-
- res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+ }
context.Copy(d, res);
}
@@ -1224,8 +1258,15 @@ namespace ARMeilleure.Instructions
Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
- res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
- res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+ }
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -1240,8 +1281,15 @@ namespace ARMeilleure.Instructions
Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
- res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
- res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+ }
context.Copy(d, res);
}
@@ -1261,6 +1309,37 @@ namespace ARMeilleure.Instructions
{
InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsSe);
}
+ else if (Optimizations.UseFma)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
else
{
EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
@@ -1286,11 +1365,19 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
+ Operand res;
+
if (sizeF == 0)
{
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
-
- res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+ }
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -1301,9 +1388,15 @@ namespace ARMeilleure.Instructions
}
else /* if (sizeF == 1) */
{
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
-
- res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+ }
context.Copy(d, res);
}
@@ -1339,8 +1432,15 @@ namespace ARMeilleure.Instructions
Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
- res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
- res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+ }
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -1355,8 +1455,15 @@ namespace ARMeilleure.Instructions
Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
- res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
- res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+ }
context.Copy(d, res);
}
@@ -1385,17 +1492,33 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
+ Operand res;
+
if (op.Size == 0)
{
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
- res = context.AddIntrinsic(Intrinsic.X86Subss, a, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, a, res);
+ }
context.Copy(d, context.VectorZeroUpper96(res));
}
else /* if (op.Size == 1) */
{
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
- res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res);
+ }
context.Copy(d, context.VectorZeroUpper64(res));
}
@@ -1669,25 +1792,39 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
+ Operand res;
+
if (op.Size == 0)
{
- Operand mask = X86GetScalar(context, -0f);
-
- Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231ss, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0f);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
- res = context.AddIntrinsic(Intrinsic.X86Subss, aNeg, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, aNeg, res);
+ }
context.Copy(d, context.VectorZeroUpper96(res));
}
else /* if (op.Size == 1) */
{
- Operand mask = X86GetScalar(context, -0d);
-
- Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231sd, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0d);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
- res = context.AddIntrinsic(Intrinsic.X86Subsd, aNeg, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, aNeg, res);
+ }
context.Copy(d, context.VectorZeroUpper64(res));
}
@@ -1716,25 +1853,39 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
+ Operand res;
+
if (op.Size == 0)
{
- Operand mask = X86GetScalar(context, -0f);
-
- Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmsub231ss, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0f);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
- res = context.AddIntrinsic(Intrinsic.X86Addss, aNeg, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addss, aNeg, res);
+ }
context.Copy(d, context.VectorZeroUpper96(res));
}
else /* if (op.Size == 1) */
{
- Operand mask = X86GetScalar(context, -0d);
-
- Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmsub231sd, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0d);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
- res = context.AddIntrinsic(Intrinsic.X86Addsd, aNeg, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addsd, aNeg, res);
+ }
context.Copy(d, context.VectorZeroUpper64(res));
}
@@ -1830,13 +1981,22 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
+ Operand res;
+
if (sizeF == 0)
{
Operand mask = X86GetScalar(context, 2f);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res);
+ }
- res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res);
res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
@@ -1845,9 +2005,16 @@ namespace ARMeilleure.Instructions
{
Operand mask = X86GetScalar(context, 2d);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res);
+ }
- res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res);
res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
@@ -1877,14 +2044,23 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
+ Operand res;
+
if (sizeF == 0)
{
Operand mask = X86GetAllElements(context, 2f);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
- res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res);
+ }
- res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
if (op.RegisterSize == RegisterSize.Simd64)
{
@@ -1897,10 +2073,17 @@ namespace ARMeilleure.Instructions
{
Operand mask = X86GetAllElements(context, 2d);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
- res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res);
+ }
- res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
context.Copy(GetVec(op.Rd), res);
}
@@ -2113,20 +2296,32 @@ namespace ARMeilleure.Instructions
public static void Frintx_S(ArmEmitterContext context)
{
- // TODO Arm64: Fast path. Should we set host FPCR?
- EmitScalarUnaryOpF(context, (op1) =>
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintxS);
+ }
+ else
{
- return EmitRoundByRMode(context, op1);
- });
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
}
public static void Frintx_V(ArmEmitterContext context)
{
- // TODO Arm64: Fast path. Should we set host FPCR?
- EmitVectorUnaryOpF(context, (op1) =>
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintxV);
+ }
+ else
{
- return EmitRoundByRMode(context, op1);
- });
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
}
public static void Frintz_S(ArmEmitterContext context)
@@ -2237,16 +2432,25 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
+ Operand res;
+
if (sizeF == 0)
{
Operand maskHalf = X86GetScalar(context, 0.5f);
Operand maskThree = X86GetScalar(context, 3f);
Operand maskOneHalf = X86GetScalar(context, 1.5f);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res);
+ }
- res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res);
- res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res);
res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
@@ -2257,10 +2461,17 @@ namespace ARMeilleure.Instructions
Operand maskThree = X86GetScalar(context, 3d);
Operand maskOneHalf = X86GetScalar(context, 1.5d);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res);
+ }
- res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res);
- res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res);
res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF);
context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
@@ -2290,15 +2501,24 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
+ Operand res;
+
if (sizeF == 0)
{
Operand maskHalf = X86GetAllElements(context, 0.5f);
Operand maskThree = X86GetAllElements(context, 3f);
Operand maskOneHalf = X86GetAllElements(context, 1.5f);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
+ }
- res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
@@ -2315,9 +2535,16 @@ namespace ARMeilleure.Instructions
Operand maskThree = X86GetAllElements(context, 3d);
Operand maskOneHalf = X86GetAllElements(context, 1.5d);
- Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
+ }
- res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
@@ -4728,53 +4955,6 @@ namespace ARMeilleure.Instructions
}
}
- public static Operand EmitSseOrAvxHandleFzModeOpF(
- ArmEmitterContext context,
- Func2I emit,
- bool scalar,
- Operand n = default,
- Operand m = default)
- {
- Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n;
- Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m;
-
- EmitSseOrAvxEnterFtzAndDazModesOpF(context, out Operand isTrue);
-
- Operand res = emit(nCopy, mCopy);
-
- EmitSseOrAvxExitFtzAndDazModesOpF(context, isTrue);
-
- if (n != default || m != default)
- {
- return res;
- }
-
- int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
-
- if (sizeF == 0)
- {
- if (scalar)
- {
- res = context.VectorZeroUpper96(res);
- }
- else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
- {
- res = context.VectorZeroUpper64(res);
- }
- }
- else /* if (sizeF == 1) */
- {
- if (scalar)
- {
- res = context.VectorZeroUpper64(res);
- }
- }
-
- context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
-
- return default;
- }
-
private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax)
{
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
@@ -4834,10 +5014,7 @@ namespace ARMeilleure.Instructions
Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
- return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
- {
- return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
- }, scalar: scalar, op1, op2);
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
}, scalar: scalar, nCopy, mCopy);
if (n != default || m != default)
@@ -4872,10 +5049,7 @@ namespace ARMeilleure.Instructions
Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
{
- return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
- {
- return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
- }, scalar: scalar, op1, op2);
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
}, scalar: scalar, nCopy, mCopy);
if (n != default || m != default)
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
index 5fdc3b5a..33ae83df 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@@ -356,9 +356,11 @@ namespace ARMeilleure.Instructions
? typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert))
: typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert));
+ context.ExitArmFpMode();
context.StoreToContext();
Operand res = context.Call(method, src);
context.LoadFromContext();
+ context.EnterArmFpMode();
InsertScalar16(context, op.Vd, op.T, res);
}
@@ -372,9 +374,11 @@ namespace ARMeilleure.Instructions
? typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert))
: typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert));
+ context.ExitArmFpMode();
context.StoreToContext();
Operand res = context.Call(method, src);
context.LoadFromContext();
+ context.EnterArmFpMode();
InsertScalar(context, op.Vd, res);
}
@@ -542,10 +546,17 @@ namespace ARMeilleure.Instructions
// VRINTX (floating-point).
public static void Vrintx_S(ArmEmitterContext context)
{
- EmitScalarUnaryOpF32(context, (op1) =>
+ if (Optimizations.UseAdvSimd)
{
- return EmitRoundByRMode(context, op1);
- });
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintxS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
}
private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
index 0e7af794..c44c9b4d 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -1,3 +1,4 @@
+using ARMeilleure.CodeGen.X86;
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
@@ -158,6 +159,75 @@ namespace ARMeilleure.Instructions
};
#endregion
+ public static void EnterArmFpMode(EmitterContext context, Func<FPState, Operand> getFpFlag)
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
+
+ Operand fzTrue = getFpFlag(FPState.FzFlag);
+ Operand r0True = getFpFlag(FPState.RMode0Flag);
+ Operand r1True = getFpFlag(FPState.RMode1Flag);
+
+ mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
+
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(fzTrue, Const((int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Um | Mxcsr.Dm)), Const(0)));
+
+ // X86 round modes in order: nearest, negative, positive, zero
+ // ARM round modes in order: nearest, positive, negative, zero
+ // Read the bits backwards to correct this.
+
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r0True, Const((int)Mxcsr.Rhi), Const(0)));
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r1True, Const((int)Mxcsr.Rlo), Const(0)));
+
+ context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
+ }
+ else if (Optimizations.UseAdvSimd)
+ {
+ Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
+
+ Operand fzTrue = getFpFlag(FPState.FzFlag);
+ Operand r0True = getFpFlag(FPState.RMode0Flag);
+ Operand r1True = getFpFlag(FPState.RMode1Flag);
+
+ fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
+
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(fzTrue, Const((int)FPCR.Fz), Const(0)));
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r0True, Const((int)FPCR.RMode0), Const(0)));
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r1True, Const((int)FPCR.RMode1), Const(0)));
+
+ context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
+
+ // TODO: Restore FPSR
+ }
+ }
+
+ public static void ExitArmFpMode(EmitterContext context, Action<FPState, Operand> setFpFlag)
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
+
+ // Unset round mode (to nearest) and ftz.
+ mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
+
+ context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
+
+ // Status flags would be stored here if they were used.
+ }
+ else if (Optimizations.UseAdvSimd)
+ {
+ Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
+
+ // Unset round mode (to nearest) and fz.
+ fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
+
+ context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
+
+ // TODO: Store FPSR
+ }
+ }
+
public static int GetImmShl(OpCodeSimdShImm op)
{
return op.Imm - (8 << op.Size);
@@ -465,9 +535,11 @@ namespace ARMeilleure.Instructions
? typeof(SoftFloat32).GetMethod(name)
: typeof(SoftFloat64).GetMethod(name);
+ context.ExitArmFpMode();
context.StoreToContext();
Operand res = context.Call(info, callArgs);
context.LoadFromContext();
+ context.EnterArmFpMode();
return res;
}
@@ -1358,39 +1430,6 @@ namespace ARMeilleure.Instructions
}
}
- [Flags]
- public enum Mxcsr
- {
- Ftz = 1 << 15, // Flush To Zero.
- Um = 1 << 11, // Underflow Mask.
- Dm = 1 << 8, // Denormal Mask.
- Daz = 1 << 6 // Denormals Are Zero.
- }
-
- public static void EmitSseOrAvxEnterFtzAndDazModesOpF(ArmEmitterContext context, out Operand isTrue)
- {
- isTrue = GetFpFlag(FPState.FzFlag);
-
- Operand lblTrue = Label();
- context.BranchIfFalse(lblTrue, isTrue);
-
- context.AddIntrinsicNoRet(Intrinsic.X86Mxcsrmb, Const((int)(Mxcsr.Ftz | Mxcsr.Um | Mxcsr.Dm | Mxcsr.Daz)));
-
- context.MarkLabel(lblTrue);
- }
-
- public static void EmitSseOrAvxExitFtzAndDazModesOpF(ArmEmitterContext context, Operand isTrue = default)
- {
- isTrue = isTrue == default ? GetFpFlag(FPState.FzFlag) : isTrue;
-
- Operand lblTrue = Label();
- context.BranchIfFalse(lblTrue, isTrue);
-
- context.AddIntrinsicNoRet(Intrinsic.X86Mxcsrub, Const((int)(Mxcsr.Ftz | Mxcsr.Daz)));
-
- context.MarkLabel(lblTrue);
- }
-
public enum CmpCondition
{
// Legacy Sse.
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
index 84b01d05..36d27d42 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -1197,9 +1197,11 @@ namespace ARMeilleure.Instructions
Array.Resize(ref callArgs, callArgs.Length + 1);
callArgs[callArgs.Length - 1] = Const(1);
+ context.ExitArmFpMode();
context.StoreToContext();
Operand res = context.Call(info, callArgs);
context.LoadFromContext();
+ context.EnterArmFpMode();
return res;
}
diff --git a/ARMeilleure/Instructions/InstEmitSystem.cs b/ARMeilleure/Instructions/InstEmitSystem.cs
index 1345bbf1..f668b83b 100644
--- a/ARMeilleure/Instructions/InstEmitSystem.cs
+++ b/ARMeilleure/Instructions/InstEmitSystem.cs
@@ -192,6 +192,8 @@ namespace ARMeilleure.Instructions
SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpcr, Const(flag)), Const(1)));
}
}
+
+ context.UpdateArmFpMode();
}
private static void EmitSetFpsr(ArmEmitterContext context)
@@ -210,6 +212,8 @@ namespace ARMeilleure.Instructions
SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpsr, Const(flag)), Const(1)));
}
}
+
+ context.UpdateArmFpMode();
}
}
}
diff --git a/ARMeilleure/Instructions/InstEmitSystem32.cs b/ARMeilleure/Instructions/InstEmitSystem32.cs
index e07db412..2f6cf19d 100644
--- a/ARMeilleure/Instructions/InstEmitSystem32.cs
+++ b/ARMeilleure/Instructions/InstEmitSystem32.cs
@@ -321,6 +321,8 @@ namespace ARMeilleure.Instructions
SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpscr, Const(flag)), Const(1)));
}
}
+
+ context.UpdateArmFpMode();
}
}
}
diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
index b629345e..f5a776fa 100644
--- a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
+++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
@@ -53,6 +53,7 @@ namespace ARMeilleure.IntermediateRepresentation
X86Haddpd,
X86Haddps,
X86Insertps,
+ X86Ldmxcsr,
X86Maxpd,
X86Maxps,
X86Maxsd,
@@ -68,8 +69,6 @@ namespace ARMeilleure.IntermediateRepresentation
X86Mulps,
X86Mulsd,
X86Mulss,
- X86Mxcsrmb,
- X86Mxcsrub,
X86Paddb,
X86Paddd,
X86Paddq,
@@ -153,6 +152,7 @@ namespace ARMeilleure.IntermediateRepresentation
X86Sqrtps,
X86Sqrtsd,
X86Sqrtss,
+ X86Stmxcsr,
X86Subpd,
X86Subps,
X86Subsd,
@@ -163,11 +163,13 @@ namespace ARMeilleure.IntermediateRepresentation
X86Unpcklps,
X86Vcvtph2ps,
X86Vcvtps2ph,
+ X86Vfmadd231pd,
X86Vfmadd231ps,
X86Vfmadd231sd,
X86Vfmadd231ss,
X86Vfmsub231sd,
X86Vfmsub231ss,
+ X86Vfnmadd231pd,
X86Vfnmadd231ps,
X86Vfnmadd231sd,
X86Vfnmadd231ss,
@@ -394,6 +396,8 @@ namespace ARMeilleure.IntermediateRepresentation
Arm64MlsVe,
Arm64MlsV,
Arm64MoviV,
+ Arm64MrsFpcr,
+ Arm64MsrFpcr,
Arm64MrsFpsr,
Arm64MsrFpsr,
Arm64MulVe,
diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs
index 238f8508..565d2aad 100644
--- a/ARMeilleure/Translation/ArmEmitterContext.cs
+++ b/ARMeilleure/Translation/ArmEmitterContext.cs
@@ -188,6 +188,21 @@ namespace ARMeilleure.Translation
}
}
+ public void EnterArmFpMode()
+ {
+ InstEmitSimdHelper.EnterArmFpMode(this, InstEmitHelper.GetFpFlag);
+ }
+
+ public void UpdateArmFpMode()
+ {
+ EnterArmFpMode();
+ }
+
+ public void ExitArmFpMode()
+ {
+ InstEmitSimdHelper.ExitArmFpMode(this, (flag, value) => InstEmitHelper.SetFpFlag(this, flag, value));
+ }
+
public Operand TryGetComparisonResult(Condition condition)
{
if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet)
diff --git a/ARMeilleure/Translation/DispatcherFunction.cs b/ARMeilleure/Translation/DispatcherFunction.cs
index e3ea21f6..7d5a3388 100644
--- a/ARMeilleure/Translation/DispatcherFunction.cs
+++ b/ARMeilleure/Translation/DispatcherFunction.cs
@@ -3,4 +3,5 @@
namespace ARMeilleure.Translation
{
delegate void DispatcherFunction(IntPtr nativeContext, ulong startAddress);
+ delegate ulong WrapperFunction(IntPtr nativeContext, ulong startAddress);
}
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index 17f68706..5970c4ff 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
- private const uint InternalVersion = 4485; //! To be incremented manually for each change to the ARMeilleure project.
+ private const uint InternalVersion = 4626; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";
diff --git a/ARMeilleure/Translation/TranslatedFunction.cs b/ARMeilleure/Translation/TranslatedFunction.cs
index 71eec08a..f007883e 100644
--- a/ARMeilleure/Translation/TranslatedFunction.cs
+++ b/ARMeilleure/Translation/TranslatedFunction.cs
@@ -25,5 +25,10 @@ namespace ARMeilleure.Translation
{
return _func(context.NativeContextPtr);
}
+
+ public ulong Execute(WrapperFunction dispatcher, State.ExecutionContext context)
+ {
+ return dispatcher(context.NativeContextPtr, (ulong)FuncPointer);
+ }
}
} \ No newline at end of file
diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs
index 0c05b2b4..f349c5eb 100644
--- a/ARMeilleure/Translation/Translator.cs
+++ b/ARMeilleure/Translation/Translator.cs
@@ -183,7 +183,7 @@ namespace ARMeilleure.Translation
Statistics.StartTimer();
- ulong nextAddr = func.Execute(context);
+ ulong nextAddr = func.Execute(Stubs.ContextWrapper, context);
Statistics.StopTimer(address);
@@ -194,7 +194,7 @@ namespace ARMeilleure.Translation
{
TranslatedFunction func = Translate(address, context.ExecutionMode, highCq: false, singleStep: true);
- address = func.Execute(context);
+ address = func.Execute(Stubs.ContextWrapper, context);
EnqueueForDeletion(address, func);
diff --git a/ARMeilleure/Translation/TranslatorStubs.cs b/ARMeilleure/Translation/TranslatorStubs.cs
index 6ed84de8..69648df4 100644
--- a/ARMeilleure/Translation/TranslatorStubs.cs
+++ b/ARMeilleure/Translation/TranslatorStubs.cs
@@ -21,6 +21,7 @@ namespace ARMeilleure.Translation
private readonly Translator _translator;
private readonly Lazy<IntPtr> _dispatchStub;
private readonly Lazy<DispatcherFunction> _dispatchLoop;
+ private readonly Lazy<WrapperFunction> _contextWrapper;
/// <summary>
/// Gets the dispatch stub.
@@ -65,6 +66,20 @@ namespace ARMeilleure.Translation
}
/// <summary>
+ /// Gets the context wrapper function.
+ /// </summary>
+ /// <exception cref="ObjectDisposedException"><see cref="TranslatorStubs"/> instance was disposed</exception>
+ public WrapperFunction ContextWrapper
+ {
+ get
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ return _contextWrapper.Value;
+ }
+ }
+
+ /// <summary>
/// Initializes a new instance of the <see cref="TranslatorStubs"/> class with the specified
/// <see cref="Translator"/> instance.
/// </summary>
@@ -77,6 +92,7 @@ namespace ARMeilleure.Translation
_translator = translator;
_dispatchStub = new(GenerateDispatchStub, isThreadSafe: true);
_dispatchLoop = new(GenerateDispatchLoop, isThreadSafe: true);
+ _contextWrapper = new(GenerateContextWrapper, isThreadSafe: true);
}
/// <summary>
@@ -203,6 +219,32 @@ namespace ARMeilleure.Translation
}
/// <summary>
+ /// Emits code that syncs FP state before executing guest code, or returns it to normal.
+ /// </summary>
+ /// <param name="context">Emitter context for the method</param>
+ /// <param name="nativeContext">Pointer to the native context</param>
+ /// <param name="enter">True if entering guest code, false otherwise</param>
+ private void EmitSyncFpContext(EmitterContext context, Operand nativeContext, bool enter)
+ {
+ if (enter)
+ {
+ InstEmitSimdHelper.EnterArmFpMode(context, (flag) =>
+ {
+ Operand flagAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRegisterOffset(new Register((int)flag, RegisterType.FpFlag))));
+ return context.Load(OperandType.I32, flagAddress);
+ });
+ }
+ else
+ {
+ InstEmitSimdHelper.ExitArmFpMode(context, (flag, value) =>
+ {
+ Operand flagAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRegisterOffset(new Register((int)flag, RegisterType.FpFlag))));
+ context.Store(flagAddress, value);
+ });
+ }
+ }
+
+ /// <summary>
/// Generates a <see cref="DispatchLoop"/> function.
/// </summary>
/// <returns><see cref="DispatchLoop"/> function</returns>
@@ -221,6 +263,8 @@ namespace ARMeilleure.Translation
Operand runningAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRunningOffset()));
Operand dispatchAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset()));
+ EmitSyncFpContext(context, nativeContext, true);
+
context.MarkLabel(beginLbl);
context.Store(dispatchAddress, guestAddress);
context.Copy(guestAddress, context.Call(Const((ulong)DispatchStub), OperandType.I64, nativeContext));
@@ -229,6 +273,9 @@ namespace ARMeilleure.Translation
context.Branch(beginLbl);
context.MarkLabel(endLbl);
+
+ EmitSyncFpContext(context, nativeContext, false);
+
context.Return();
var cfg = context.GetControlFlowGraph();
@@ -237,5 +284,29 @@ namespace ARMeilleure.Translation
return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DispatcherFunction>();
}
+
+ /// <summary>
+ /// Generates a <see cref="ContextWrapper"/> function.
+ /// </summary>
+ /// <returns><see cref="ContextWrapper"/> function</returns>
+ private WrapperFunction GenerateContextWrapper()
+ {
+ var context = new EmitterContext();
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+ Operand guestMethod = context.LoadArgument(OperandType.I64, 1);
+
+ EmitSyncFpContext(context, nativeContext, true);
+ Operand returnValue = context.Call(guestMethod, OperandType.I64, nativeContext);
+ EmitSyncFpContext(context, nativeContext, false);
+
+ context.Return(returnValue);
+
+ var cfg = context.GetControlFlowGraph();
+ var retType = OperandType.I64;
+ var argTypes = new[] { OperandType.I64, OperandType.I64 };
+
+ return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<WrapperFunction>();
+ }
}
}
diff --git a/ARMeilleure/Translation/TranslatorTestMethods.cs b/ARMeilleure/Translation/TranslatorTestMethods.cs
new file mode 100644
index 00000000..ab96019a
--- /dev/null
+++ b/ARMeilleure/Translation/TranslatorTestMethods.cs
@@ -0,0 +1,148 @@
+using ARMeilleure.CodeGen.X86;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Runtime.InteropServices;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Translation
+{
+ public static class TranslatorTestMethods
+ {
+ public delegate int FpFlagsPInvokeTest(IntPtr managedMethod);
+
+ private static bool SetPlatformFtz(EmitterContext context, bool ftz)
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
+
+ if (ftz)
+ {
+ mxcsr = context.BitwiseOr(mxcsr, Const((int)(Mxcsr.Ftz | Mxcsr.Um | Mxcsr.Dm)));
+ }
+ else
+ {
+ mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)Mxcsr.Ftz));
+ }
+
+ context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
+
+ return true;
+ }
+ else if (Optimizations.UseAdvSimd)
+ {
+ Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
+
+ if (ftz)
+ {
+ fpcr = context.BitwiseOr(fpcr, Const((int)FPCR.Fz));
+ }
+ else
+ {
+ fpcr = context.BitwiseAnd(fpcr, Const(~(int)FPCR.Fz));
+ }
+
+ context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ private static Operand FpBitsToInt(EmitterContext context, Operand fp)
+ {
+ Operand vec = context.VectorInsert(context.VectorZero(), fp, 0);
+ return context.VectorExtract(OperandType.I32, vec, 0);
+ }
+
+ public static FpFlagsPInvokeTest GenerateFpFlagsPInvokeTest()
+ {
+ EmitterContext context = new EmitterContext();
+
+ Operand methodAddress = context.Copy(context.LoadArgument(OperandType.I64, 0));
+
+ // Verify that default dotnet fp state does not flush to zero.
+ // This is required for SoftFloat to function.
+
+ // Denormal + zero != 0
+
+ Operand denormal = ConstF(BitConverter.Int32BitsToSingle(1)); // 1.40129846432e-45
+ Operand zeroF = ConstF(0f);
+ Operand zero = Const(0);
+
+ Operand result = context.Add(zeroF, denormal);
+
+ // Must not be zero.
+
+ Operand correct1Label = Label();
+
+ context.BranchIfFalse(correct1Label, context.ICompareEqual(FpBitsToInt(context, result), zero));
+
+ context.Return(Const(1));
+
+ context.MarkLabel(correct1Label);
+
+ // Set flush to zero flag. If unsupported by the backend, just return true.
+
+ if (!SetPlatformFtz(context, true))
+ {
+ context.Return(Const(0));
+ }
+
+ // Denormal + zero == 0
+
+ Operand resultFz = context.Add(zeroF, denormal);
+
+ // Must equal zero.
+
+ Operand correct2Label = Label();
+
+ context.BranchIfTrue(correct2Label, context.ICompareEqual(FpBitsToInt(context, resultFz), zero));
+
+ SetPlatformFtz(context, false);
+
+ context.Return(Const(2));
+
+ context.MarkLabel(correct2Label);
+
+ // Call a managed method. This method should not change Fz state.
+
+ context.Call(methodAddress, OperandType.None);
+
+ // Denormal + zero == 0
+
+ Operand resultFz2 = context.Add(zeroF, denormal);
+
+ // Must equal zero.
+
+ Operand correct3Label = Label();
+
+ context.BranchIfTrue(correct3Label, context.ICompareEqual(FpBitsToInt(context, resultFz2), zero));
+
+ SetPlatformFtz(context, false);
+
+ context.Return(Const(3));
+
+ context.MarkLabel(correct3Label);
+
+ // Success.
+
+ SetPlatformFtz(context, false);
+
+ context.Return(Const(0));
+
+ // Compile and return the function.
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[] { OperandType.I64 };
+
+ return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<FpFlagsPInvokeTest>();
+ }
+ }
+}
diff --git a/Ryujinx.Tests/Cpu/EnvironmentTests.cs b/Ryujinx.Tests/Cpu/EnvironmentTests.cs
new file mode 100644
index 00000000..d374c08a
--- /dev/null
+++ b/Ryujinx.Tests/Cpu/EnvironmentTests.cs
@@ -0,0 +1,91 @@
+using ARMeilleure.Translation;
+using NUnit.Framework;
+using Ryujinx.Cpu.Jit;
+using Ryujinx.Tests.Memory;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Tests.Cpu
+{
+ internal class EnvironmentTests
+ {
+ private static Translator _translator;
+
+ private void EnsureTranslator()
+ {
+ // Create a translator, as one is needed to register the signal handler or emit methods.
+ _translator ??= new Translator(new JitMemoryAllocator(), new MockMemoryManager(), true);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.NoOptimization)]
+ private float GetDenormal()
+ {
+ return BitConverter.Int32BitsToSingle(1);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.NoOptimization)]
+ private float GetZero()
+ {
+ return BitConverter.Int32BitsToSingle(0);
+ }
+
+ /// <summary>
+ /// This test ensures that managed methods do not reset floating point control flags.
+ /// This is used to avoid changing control flags when running methods that don't require it, such as SVC calls, software memory...
+ /// </summary>
+ [Test]
+ public void FpFlagsPInvoke()
+ {
+ EnsureTranslator();
+
+ // Subnormal results are not flushed to zero by default.
+ // This operation should not be allowed to do constant propagation, hence the methods that explicitly disallow inlining.
+ Assert.AreNotEqual(GetDenormal() + GetZero(), 0f);
+
+ bool methodCalled = false;
+ bool isFz = false;
+
+ var managedMethod = () =>
+ {
+ // Floating point math should not modify fp flags.
+ float test = 2f * 3.5f;
+
+ if (test < 4f)
+ {
+ throw new System.Exception("Sanity check.");
+ }
+
+ isFz = GetDenormal() + GetZero() == 0f;
+
+ try
+ {
+ if (test >= 4f)
+ {
+ throw new System.Exception("Always throws.");
+ }
+ }
+ catch
+ {
+ // Exception handling should not modify fp flags.
+
+ methodCalled = true;
+ }
+ };
+
+ var method = TranslatorTestMethods.GenerateFpFlagsPInvokeTest();
+
+ // This method sets flush-to-zero and then calls the managed method.
+ // Before and after setting the flags, it ensures subnormal addition works as expected.
+ // It returns a positive result if any tests fail, and 0 on success (or if the platform cannot change FP flags)
+ int result = method(Marshal.GetFunctionPointerForDelegate(managedMethod));
+
+ // Subnormal results are not flushed to zero by default, which we should have returned to exiting the method.
+ Assert.AreNotEqual(GetDenormal() + GetZero(), 0f);
+
+ Assert.True(result == 0);
+ Assert.True(methodCalled);
+ Assert.True(isFz);
+ }
+ }
+}