aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDomenico V <35856442+LDj3SNuD@users.noreply.github.com>2023-07-31 01:57:37 +0200
committerGitHub <noreply@github.com>2023-07-30 20:57:37 -0300
commit2be8b6ea4527239fff1b2fdf9dc7ce1346b173e6 (patch)
tree31547834bced733e5c06b29d21aabf6ca7889e24
parentf95b7c58779f01d9077996da67953d8d9acd058c (diff)
CPU (A64): Add Fmaxp & Fminp Scalar Inst.s, Fast & Slow Paths; with Tests. (#5502)1.1.971
* Add Fmaxp & Fminp Scalar Inst.s, Fast & Slow Paths; with Tests. * Ptc.InternalVersion = 5502
-rw-r--r--src/ARMeilleure/Decoders/OpCodeTable.cs2
-rw-r--r--src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs50
-rw-r--r--src/ARMeilleure/Instructions/InstName.cs2
-rw-r--r--src/ARMeilleure/Translation/PTC/Ptc.cs2
-rw-r--r--src/Ryujinx.Tests/Cpu/CpuTestSimd.cs4
5 files changed, 59 insertions, 1 deletions
diff --git a/src/ARMeilleure/Decoders/OpCodeTable.cs b/src/ARMeilleure/Decoders/OpCodeTable.cs
index 5cfd0bb8..9e13bd9b 100644
--- a/src/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/src/ARMeilleure/Decoders/OpCodeTable.cs
@@ -330,6 +330,7 @@ namespace ARMeilleure.Decoders
SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S, InstEmit.Fmaxnmp_S, OpCodeSimd.Create);
SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, OpCodeSimdReg.Create);
SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, OpCodeSimd.Create);
+ SetA64("011111100x110000111110xxxxxxxxxx", InstName.Fmaxp_S, InstEmit.Fmaxp_S, OpCodeSimd.Create);
SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, OpCodeSimdReg.Create);
SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, OpCodeSimd.Create);
SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, OpCodeSimdReg.Create);
@@ -339,6 +340,7 @@ namespace ARMeilleure.Decoders
SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S, InstEmit.Fminnmp_S, OpCodeSimd.Create);
SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, OpCodeSimdReg.Create);
SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, OpCodeSimd.Create);
+ SetA64("011111101x110000111110xxxxxxxxxx", InstName.Fminp_S, InstEmit.Fminp_S, OpCodeSimd.Create);
SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, OpCodeSimdReg.Create);
SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, OpCodeSimd.Create);
SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, OpCodeSimdRegElemF.Create);
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index 7b308fa9..543aab02 100644
--- a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -883,6 +883,31 @@ namespace ARMeilleure.Instructions
}
}
+ public static void Fmaxp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
public static void Fmaxp_V(ArmEmitterContext context)
{
if (Optimizations.UseAdvSimd)
@@ -1081,6 +1106,31 @@ namespace ARMeilleure.Instructions
}
}
+ public static void Fminp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
public static void Fminp_V(ArmEmitterContext context)
{
if (Optimizations.UseAdvSimd)
diff --git a/src/ARMeilleure/Instructions/InstName.cs b/src/ARMeilleure/Instructions/InstName.cs
index fd71d92e..32ae38da 100644
--- a/src/ARMeilleure/Instructions/InstName.cs
+++ b/src/ARMeilleure/Instructions/InstName.cs
@@ -228,6 +228,7 @@ namespace ARMeilleure.Instructions
Fmaxnmp_S,
Fmaxnmp_V,
Fmaxnmv_V,
+ Fmaxp_S,
Fmaxp_V,
Fmaxv_V,
Fmin_S,
@@ -237,6 +238,7 @@ namespace ARMeilleure.Instructions
Fminnmp_S,
Fminnmp_V,
Fminnmv_V,
+ Fminp_S,
Fminp_V,
Fminv_V,
Fmla_Se,
diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs
index 14d4e471..ce653383 100644
--- a/src/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/src/ARMeilleure/Translation/PTC/Ptc.cs
@@ -29,7 +29,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
- private const uint InternalVersion = 5343; //! To be incremented manually for each change to the ARMeilleure project.
+ private const uint InternalVersion = 5502; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";
diff --git a/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs
index 4c568a8f..eb763618 100644
--- a/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@@ -764,7 +764,9 @@ namespace Ryujinx.Tests.Cpu
{
0x7E30D820u, // FADDP S0, V1.2S
0x7E30C820u, // FMAXNMP S0, V1.2S
+ 0x7E30F820u, // FMAXP S0, V1.2S
0x7EB0C820u, // FMINNMP S0, V1.2S
+ 0x7EB0F820u, // FMINP S0, V1.2S
};
}
@@ -774,7 +776,9 @@ namespace Ryujinx.Tests.Cpu
{
0x7E70D820u, // FADDP D0, V1.2D
0x7E70C820u, // FMAXNMP D0, V1.2D
+ 0x7E70F820u, // FMAXP D0, V1.2D
0x7EF0C820u, // FMINNMP D0, V1.2D
+ 0x7EF0F820u, // FMINP D0, V1.2D
};
}