aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs185
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic.cs2
-rw-r--r--ARMeilleure/Optimizations.cs46
-rw-r--r--ARMeilleure/Translation/PTC/Ptc.cs37
4 files changed, 238 insertions, 32 deletions
diff --git a/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs b/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
new file mode 100644
index 00000000..99ff299e
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
@@ -0,0 +1,185 @@
+using System;
+using System.Linq;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Versioning;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static partial class HardwareCapabilities
+ {
+ static HardwareCapabilities()
+ {
+ if (!ArmBase.Arm64.IsSupported)
+ {
+ return;
+ }
+
+ if (OperatingSystem.IsLinux())
+ {
+ LinuxFeatureInfoHwCap = (LinuxFeatureFlagsHwCap)getauxval(AT_HWCAP);
+ LinuxFeatureInfoHwCap2 = (LinuxFeatureFlagsHwCap2)getauxval(AT_HWCAP2);
+ }
+
+ if (OperatingSystem.IsMacOS())
+ {
+ for (int i = 0; i < _sysctlNames.Length; i++)
+ {
+ if (CheckSysctlName(_sysctlNames[i]))
+ {
+ MacOsFeatureInfo |= (MacOsFeatureFlags)(1 << i);
+ }
+ }
+ }
+ }
+
+#region Linux
+
+ private const ulong AT_HWCAP = 16;
+ private const ulong AT_HWCAP2 = 26;
+
+ [LibraryImport("libc", SetLastError = true)]
+ private static partial ulong getauxval(ulong type);
+
+ [Flags]
+ public enum LinuxFeatureFlagsHwCap : ulong
+ {
+ Fp = 1 << 0,
+ Asimd = 1 << 1,
+ Evtstrm = 1 << 2,
+ Aes = 1 << 3,
+ Pmull = 1 << 4,
+ Sha1 = 1 << 5,
+ Sha2 = 1 << 6,
+ Crc32 = 1 << 7,
+ Atomics = 1 << 8,
+ FpHp = 1 << 9,
+ AsimdHp = 1 << 10,
+ CpuId = 1 << 11,
+ AsimdRdm = 1 << 12,
+ Jscvt = 1 << 13,
+ Fcma = 1 << 14,
+ Lrcpc = 1 << 15,
+ DcpOp = 1 << 16,
+ Sha3 = 1 << 17,
+ Sm3 = 1 << 18,
+ Sm4 = 1 << 19,
+ AsimdDp = 1 << 20,
+ Sha512 = 1 << 21,
+ Sve = 1 << 22,
+ AsimdFhm = 1 << 23,
+ Dit = 1 << 24,
+ Uscat = 1 << 25,
+ Ilrcpc = 1 << 26,
+ FlagM = 1 << 27,
+ Ssbs = 1 << 28,
+ Sb = 1 << 29,
+ Paca = 1 << 30,
+ Pacg = 1UL << 31
+ }
+
+ [Flags]
+ public enum LinuxFeatureFlagsHwCap2 : ulong
+ {
+ Dcpodp = 1 << 0,
+ Sve2 = 1 << 1,
+ SveAes = 1 << 2,
+ SvePmull = 1 << 3,
+ SveBitperm = 1 << 4,
+ SveSha3 = 1 << 5,
+ SveSm4 = 1 << 6,
+ FlagM2 = 1 << 7,
+ Frint = 1 << 8,
+ SveI8mm = 1 << 9,
+ SveF32mm = 1 << 10,
+ SveF64mm = 1 << 11,
+ SveBf16 = 1 << 12,
+ I8mm = 1 << 13,
+ Bf16 = 1 << 14,
+ Dgh = 1 << 15,
+ Rng = 1 << 16,
+ Bti = 1 << 17,
+ Mte = 1 << 18,
+ Ecv = 1 << 19,
+ Afp = 1 << 20,
+ Rpres = 1 << 21,
+ Mte3 = 1 << 22,
+ Sme = 1 << 23,
+ Sme_i16i64 = 1 << 24,
+ Sme_f64f64 = 1 << 25,
+ Sme_i8i32 = 1 << 26,
+ Sme_f16f32 = 1 << 27,
+ Sme_b16f32 = 1 << 28,
+ Sme_f32f32 = 1 << 29,
+ Sme_fa64 = 1 << 30,
+ Wfxt = 1UL << 31,
+ Ebf16 = 1UL << 32,
+ Sve_Ebf16 = 1UL << 33,
+ Cssc = 1UL << 34,
+ Rprfm = 1UL << 35,
+ Sve2p1 = 1UL << 36
+ }
+
+ public static LinuxFeatureFlagsHwCap LinuxFeatureInfoHwCap { get; } = 0;
+ public static LinuxFeatureFlagsHwCap2 LinuxFeatureInfoHwCap2 { get; } = 0;
+
+#endregion
+
+#region macOS
+
+ [LibraryImport("libSystem.dylib", SetLastError = true)]
+ private static unsafe partial int sysctlbyname([MarshalAs(UnmanagedType.LPStr)] string name, out int oldValue, ref ulong oldSize, IntPtr newValue, ulong newValueSize);
+
+ [SupportedOSPlatform("macos")]
+ private static bool CheckSysctlName(string name)
+ {
+ ulong size = sizeof(int);
+ if (sysctlbyname(name, out int val, ref size, IntPtr.Zero, 0) == 0 && size == sizeof(int))
+ {
+ return val != 0;
+ }
+ return false;
+ }
+
+ private static string[] _sysctlNames = new string[]
+ {
+ "hw.optional.floatingpoint",
+ "hw.optional.AdvSIMD",
+ "hw.optional.arm.FEAT_FP16",
+ "hw.optional.arm.FEAT_AES",
+ "hw.optional.arm.FEAT_PMULL",
+ "hw.optional.arm.FEAT_LSE",
+ "hw.optional.armv8_crc32",
+ "hw.optional.arm.FEAT_SHA1",
+ "hw.optional.arm.FEAT_SHA256"
+ };
+
+ [Flags]
+ public enum MacOsFeatureFlags
+ {
+ Fp = 1 << 0,
+ AdvSimd = 1 << 1,
+ Fp16 = 1 << 2,
+ Aes = 1 << 3,
+ Pmull = 1 << 4,
+ Lse = 1 << 5,
+ Crc32 = 1 << 6,
+ Sha1 = 1 << 7,
+ Sha256 = 1 << 8
+ }
+
+ public static MacOsFeatureFlags MacOsFeatureInfo { get; } = 0;
+
+#endregion
+
+ public static bool SupportsAdvSimd => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Asimd) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.AdvSimd);
+ public static bool SupportsAes => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Aes) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Aes);
+ public static bool SupportsPmull => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Pmull) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Pmull);
+ public static bool SupportsLse => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Atomics) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Lse);
+ public static bool SupportsCrc32 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Crc32) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Crc32);
+ public static bool SupportsSha1 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha1) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha1);
+ public static bool SupportsSha256 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha2) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha256);
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index 3e65db23..d0bb68e4 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -2556,7 +2556,7 @@ namespace ARMeilleure.Instructions
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
- if (Optimizations.UseAdvSimd && false) // Not supported by all Arm CPUs.
+ if (Optimizations.UseArm64Pmull)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV);
}
diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs
index 0810d96c..9044314f 100644
--- a/ARMeilleure/Optimizations.cs
+++ b/ARMeilleure/Optimizations.cs
@@ -1,8 +1,10 @@
-using ARMeilleure.CodeGen.X86;
using System.Runtime.Intrinsics.Arm;
namespace ARMeilleure
{
+ using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
+ using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
+
public static class Optimizations
{
public static bool FastFP { get; set; } = true;
@@ -10,7 +12,8 @@ namespace ARMeilleure
public static bool AllowLcqInFunctionTable { get; set; } = true;
public static bool UseUnmanagedDispatchLoop { get; set; } = true;
- public static bool UseAdvSimdIfAvailable { get; set; } = true;
+ public static bool UseAdvSimdIfAvailable { get; set; } = true;
+ public static bool UseArm64PmullIfAvailable { get; set; } = true;
public static bool UseSseIfAvailable { get; set; } = true;
public static bool UseSse2IfAvailable { get; set; } = true;
@@ -29,25 +32,26 @@ namespace ARMeilleure
public static bool ForceLegacySse
{
- get => HardwareCapabilities.ForceLegacySse;
- set => HardwareCapabilities.ForceLegacySse = value;
+ get => X86HardwareCapabilities.ForceLegacySse;
+ set => X86HardwareCapabilities.ForceLegacySse = value;
}
- internal static bool UseAdvSimd => UseAdvSimdIfAvailable && AdvSimd.IsSupported;
-
- internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse;
- internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2;
- internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3;
- internal static bool UseSsse3 => UseSsse3IfAvailable && HardwareCapabilities.SupportsSsse3;
- internal static bool UseSse41 => UseSse41IfAvailable && HardwareCapabilities.SupportsSse41;
- internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42;
- internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
- internal static bool UseAvx => UseAvxIfAvailable && HardwareCapabilities.SupportsAvx && !ForceLegacySse;
- internal static bool UseF16c => UseF16cIfAvailable && HardwareCapabilities.SupportsF16c;
- internal static bool UseFma => UseFmaIfAvailable && HardwareCapabilities.SupportsFma;
- internal static bool UseAesni => UseAesniIfAvailable && HardwareCapabilities.SupportsAesni;
- internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && HardwareCapabilities.SupportsPclmulqdq;
- internal static bool UseSha => UseShaIfAvailable && HardwareCapabilities.SupportsSha;
- internal static bool UseGfni => UseGfniIfAvailable && HardwareCapabilities.SupportsGfni;
+ internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd;
+ internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull;
+
+ internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse;
+ internal static bool UseSse2 => UseSse2IfAvailable && X86HardwareCapabilities.SupportsSse2;
+ internal static bool UseSse3 => UseSse3IfAvailable && X86HardwareCapabilities.SupportsSse3;
+ internal static bool UseSsse3 => UseSsse3IfAvailable && X86HardwareCapabilities.SupportsSsse3;
+ internal static bool UseSse41 => UseSse41IfAvailable && X86HardwareCapabilities.SupportsSse41;
+ internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42;
+ internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt;
+ internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse;
+ internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c;
+ internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma;
+ internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni;
+ internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq;
+ internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha;
+ internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni;
}
-} \ No newline at end of file
+}
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index 6f57e188..a59bc588 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -1,7 +1,6 @@
using ARMeilleure.CodeGen;
using ARMeilleure.CodeGen.Linking;
using ARMeilleure.CodeGen.Unwinding;
-using ARMeilleure.CodeGen.X86;
using ARMeilleure.Common;
using ARMeilleure.Memory;
using Ryujinx.Common;
@@ -22,12 +21,15 @@ using static ARMeilleure.Translation.PTC.PtcFormatter;
namespace ARMeilleure.Translation.PTC
{
+ using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
+ using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
+
class Ptc : IPtcLoadState
{
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
- private const uint InternalVersion = 4114; //! To be incremented manually for each change to the ARMeilleure project.
+ private const uint InternalVersion = 4264; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";
@@ -952,11 +954,26 @@ namespace ARMeilleure.Translation.PTC
private static FeatureInfo GetFeatureInfo()
{
- return new FeatureInfo(
- (uint)HardwareCapabilities.FeatureInfo1Ecx,
- (uint)HardwareCapabilities.FeatureInfo1Edx,
- (uint)HardwareCapabilities.FeatureInfo7Ebx,
- (uint)HardwareCapabilities.FeatureInfo7Ecx);
+ if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
+ {
+ return new FeatureInfo(
+ (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap,
+ (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2,
+ (ulong)Arm64HardwareCapabilities.MacOsFeatureInfo,
+ 0);
+ }
+ else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
+ {
+ return new FeatureInfo(
+ (ulong)X86HardwareCapabilities.FeatureInfo1Ecx,
+ (ulong)X86HardwareCapabilities.FeatureInfo1Edx,
+ (ulong)X86HardwareCapabilities.FeatureInfo7Ebx,
+ (ulong)X86HardwareCapabilities.FeatureInfo7Ecx);
+ }
+ else
+ {
+ return new FeatureInfo(0, 0, 0, 0);
+ }
}
private byte GetMemoryManagerMode()
@@ -976,7 +993,7 @@ namespace ARMeilleure.Translation.PTC
return osPlatform;
}
- [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 58*/)]
+ [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 74*/)]
private struct OuterHeader
{
public ulong Magic;
@@ -1007,8 +1024,8 @@ namespace ARMeilleure.Translation.PTC
}
}
- [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 16*/)]
- private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2, uint FeatureInfo3);
+ [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 32*/)]
+ private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3);
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
private struct InnerHeader