aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormerry <git@mary.rs>2022-08-25 11:12:13 +0100
committerGitHub <noreply@github.com>2022-08-25 10:12:13 +0000
commitf5235fff29e797ed76022bbd51e4e64577c83457 (patch)
treef5d6be6efd9990cf74bf7272b62942a9438d9814
parenteba682b767a60db51ff624ae48a3ca0124634705 (diff)
ARMeilleure: Hardware accelerate SHA256 (#3585)1.1.230
* ARMeilleure/HardwareCapabilities: Add Sha * ARMeilleure/Intrinsic: Add X86Sha256Rnds2 * ARmeilleure: Hardware accelerate SHA256H/SHA256H2 * ARMeilleure/Intrinsic: Add X86Sha256Msg1, X86Sha256Msg2 * ARMeilleure/Intrinsic: Add X86Palignr * ARMeilleure: Hardware accelerate SHA256SU0, SHA256SU1 * PTC: Bump InternalVersion
-rw-r--r--ARMeilleure/CodeGen/X86/AssemblerTable.cs4
-rw-r--r--ARMeilleure/CodeGen/X86/HardwareCapabilities.cs57
-rw-r--r--ARMeilleure/CodeGen/X86/IntrinsicTable.cs4
-rw-r--r--ARMeilleure/CodeGen/X86/PreAllocator.cs10
-rw-r--r--ARMeilleure/CodeGen/X86/X86Instruction.cs4
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHash.cs8
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHash32.cs8
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHashHelper.cs56
-rw-r--r--ARMeilleure/Instructions/SoftFallback.cs2
-rw-r--r--ARMeilleure/IntermediateRepresentation/Intrinsic.cs4
-rw-r--r--ARMeilleure/Optimizations.cs2
-rw-r--r--ARMeilleure/Translation/PTC/Ptc.cs16
12 files changed, 137 insertions, 38 deletions
diff --git a/ARMeilleure/CodeGen/X86/AssemblerTable.cs b/ARMeilleure/CodeGen/X86/AssemblerTable.cs
index 3af42204..e40ffad4 100644
--- a/ARMeilleure/CodeGen/X86/AssemblerTable.cs
+++ b/ARMeilleure/CodeGen/X86/AssemblerTable.cs
@@ -157,6 +157,7 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Palignr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0f, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
@@ -239,6 +240,9 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest));
+ Add(X86Instruction.Sha256Msg1, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cc, InstructionFlags.None));
+ Add(X86Instruction.Sha256Msg2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cd, InstructionFlags.None));
+ Add(X86Instruction.Sha256Rnds2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cb, InstructionFlags.None));
Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
index aa103e30..a29dd5be 100644
--- a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
+++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
@@ -12,21 +12,28 @@ namespace ARMeilleure.CodeGen.X86
return;
}
- (_, _, int ecx, int edx) = X86Base.CpuId(0x00000001, 0x00000000);
+ (int maxNum, _, _, _) = X86Base.CpuId(0x00000000, 0x00000000);
- FeatureInfoEdx = (FeatureFlagsEdx)edx;
- FeatureInfoEcx = (FeatureFlagsEcx)ecx;
+ (_, _, int ecx1, int edx1) = X86Base.CpuId(0x00000001, 0x00000000);
+ FeatureInfo1Edx = (FeatureFlags1Edx)edx1;
+ FeatureInfo1Ecx = (FeatureFlags1Ecx)ecx1;
+
+ if (maxNum >= 7)
+ {
+ (_, int ebx7, _, _) = X86Base.CpuId(0x00000007, 0x00000000);
+ FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
+ }
}
[Flags]
- public enum FeatureFlagsEdx
+ public enum FeatureFlags1Edx
{
Sse = 1 << 25,
Sse2 = 1 << 26
}
[Flags]
- public enum FeatureFlagsEcx
+ public enum FeatureFlags1Ecx
{
Sse3 = 1 << 0,
Pclmulqdq = 1 << 1,
@@ -40,21 +47,31 @@ namespace ARMeilleure.CodeGen.X86
F16c = 1 << 29
}
- public static FeatureFlagsEdx FeatureInfoEdx { get; }
- public static FeatureFlagsEcx FeatureInfoEcx { get; }
-
- public static bool SupportsSse => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse);
- public static bool SupportsSse2 => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse2);
- public static bool SupportsSse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse3);
- public static bool SupportsPclmulqdq => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Pclmulqdq);
- public static bool SupportsSsse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Ssse3);
- public static bool SupportsFma => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Fma);
- public static bool SupportsSse41 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse41);
- public static bool SupportsSse42 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse42);
- public static bool SupportsPopcnt => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Popcnt);
- public static bool SupportsAesni => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Aes);
- public static bool SupportsAvx => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Avx);
- public static bool SupportsF16c => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.F16c);
+ [Flags]
+ public enum FeatureFlags7Ebx
+ {
+ Avx2 = 1 << 5,
+ Sha = 1 << 29
+ }
+
+ public static FeatureFlags1Edx FeatureInfo1Edx { get; }
+ public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
+ public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
+
+ public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
+ public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
+ public static bool SupportsSse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse3);
+ public static bool SupportsPclmulqdq => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Pclmulqdq);
+ public static bool SupportsSsse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Ssse3);
+ public static bool SupportsFma => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Fma);
+ public static bool SupportsSse41 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse41);
+ public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42);
+ public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt);
+ public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes);
+ public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx);
+ public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
+ public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
+ public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
public static bool ForceLegacySse { get; set; }
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
index 5deee349..ada86cfa 100644
--- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -82,6 +82,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Palignr, new IntrinsicInfo(X86Instruction.Palignr, IntrinsicType.TernaryImm));
Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary));
Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary));
Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary));
@@ -151,6 +152,9 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm));
Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary));
Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sha256Msg1, new IntrinsicInfo(X86Instruction.Sha256Msg1, IntrinsicType.Binary));
+ Add(Intrinsic.X86Sha256Msg2, new IntrinsicInfo(X86Instruction.Sha256Msg2, IntrinsicType.Binary));
+ Add(Intrinsic.X86Sha256Rnds2, new IntrinsicInfo(X86Instruction.Sha256Rnds2, IntrinsicType.Ternary));
Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm));
Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm));
Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary));
diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs
index 7d2d4df0..dd73a1dd 100644
--- a/ARMeilleure/CodeGen/X86/PreAllocator.cs
+++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs
@@ -308,11 +308,13 @@ namespace ARMeilleure.CodeGen.X86
case Instruction.Extended:
{
+ bool isBlend = node.Intrinsic == Intrinsic.X86Blendvpd ||
+ node.Intrinsic == Intrinsic.X86Blendvps ||
+ node.Intrinsic == Intrinsic.X86Pblendvb;
+
// BLENDVPD, BLENDVPS, PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
- if ((node.Intrinsic == Intrinsic.X86Blendvpd ||
- node.Intrinsic == Intrinsic.X86Blendvps ||
- node.Intrinsic == Intrinsic.X86Pblendvb) &&
- !HardwareCapabilities.SupportsVexEncoding)
+ // SHA256RNDS2 always has an implied XMM0 as a last operand.
+ if ((isBlend && !HardwareCapabilities.SupportsVexEncoding) || node.Intrinsic == Intrinsic.X86Sha256Rnds2)
{
Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs
index ed5b50c5..37926a16 100644
--- a/ARMeilleure/CodeGen/X86/X86Instruction.cs
+++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -98,6 +98,7 @@ namespace ARMeilleure.CodeGen.X86
Paddd,
Paddq,
Paddw,
+ Palignr,
Pand,
Pandn,
Pavgb,
@@ -180,6 +181,9 @@ namespace ARMeilleure.CodeGen.X86
Rsqrtss,
Sar,
Setcc,
+ Sha256Msg1,
+ Sha256Msg2,
+ Sha256Rnds2,
Shl,
Shr,
Shufpd,
diff --git a/ARMeilleure/Instructions/InstEmitSimdHash.cs b/ARMeilleure/Instructions/InstEmitSimdHash.cs
index ed8b4afd..4fb048ee 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHash.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHash.cs
@@ -100,7 +100,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
- Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashLower)), d, n, m);
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
context.Copy(GetVec(op.Rd), res);
}
@@ -113,7 +113,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
- Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashUpper)), d, n, m);
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
context.Copy(GetVec(op.Rd), res);
}
@@ -125,7 +125,7 @@ namespace ARMeilleure.Instructions
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
- Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), d, n);
+ Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, n);
context.Copy(GetVec(op.Rd), res);
}
@@ -138,7 +138,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
- Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), d, n, m);
+ Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
context.Copy(GetVec(op.Rd), res);
}
diff --git a/ARMeilleure/Instructions/InstEmitSimdHash32.cs b/ARMeilleure/Instructions/InstEmitSimdHash32.cs
index e19d364d..51334608 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHash32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHash32.cs
@@ -17,7 +17,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
- Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashLower)), d, n, m);
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
context.Copy(GetVecA32(op.Qd), res);
}
@@ -30,7 +30,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
- Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashUpper)), d, n, m);
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
context.Copy(GetVecA32(op.Qd), res);
}
@@ -42,7 +42,7 @@ namespace ARMeilleure.Instructions
Operand d = GetVecA32(op.Qd);
Operand m = GetVecA32(op.Qm);
- Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), d, m);
+ Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, m);
context.Copy(GetVecA32(op.Qd), res);
}
@@ -55,7 +55,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
- Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), d, n, m);
+ Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
context.Copy(GetVecA32(op.Qd), res);
}
diff --git a/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs
new file mode 100644
index 00000000..23e4948d
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs
@@ -0,0 +1,56 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitSimdHashHelper
+ {
+ public static Operand EmitSha256h(ArmEmitterContext context, Operand x, Operand y, Operand w, bool part2)
+ {
+ if (Optimizations.UseSha)
+ {
+ Operand src1 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0xbb));
+ Operand src2 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0x11));
+ Operand w2 = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, w, w);
+
+ Operand round2 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src1, src2, w);
+ Operand round4 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src2, round2, w2);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, round4, round2, Const(part2 ? 0x11 : 0xbb));
+
+ return res;
+ }
+
+ String method = part2 ? nameof(SoftFallback.HashUpper) : nameof(SoftFallback.HashLower);
+ return context.Call(typeof(SoftFallback).GetMethod(method), x, y, w);
+ }
+
+ public static Operand EmitSha256su0(ArmEmitterContext context, Operand x, Operand y)
+ {
+ if (Optimizations.UseSha)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Sha256Msg1, x, y);
+ }
+
+ return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), x, y);
+ }
+
+ public static Operand EmitSha256su1(ArmEmitterContext context, Operand x, Operand y, Operand z)
+ {
+ if (Optimizations.UseSha && Optimizations.UseSsse3)
+ {
+ Operand extr = context.AddIntrinsic(Intrinsic.X86Palignr, z, y, Const(4));
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Paddd, extr, x);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Sha256Msg2, tmp, z);
+
+ return res;
+ }
+
+ return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), x, y, z);
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs
index d9e43021..f83afc16 100644
--- a/ARMeilleure/Instructions/SoftFallback.cs
+++ b/ARMeilleure/Instructions/SoftFallback.cs
@@ -1129,7 +1129,7 @@ namespace ARMeilleure.Instructions
return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
}
- public static V128 HashUpper(V128 hash_efgh, V128 hash_abcd, V128 wk)
+ public static V128 HashUpper(V128 hash_abcd, V128 hash_efgh, V128 wk)
{
return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
}
diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
index f5c5f3d7..71bfc3be 100644
--- a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
+++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
@@ -71,6 +71,7 @@ namespace ARMeilleure.IntermediateRepresentation
X86Paddd,
X86Paddq,
X86Paddw,
+ X86Palignr,
X86Pand,
X86Pandn,
X86Pavgb,
@@ -140,6 +141,9 @@ namespace ARMeilleure.IntermediateRepresentation
X86Roundss,
X86Rsqrtps,
X86Rsqrtss,
+ X86Sha256Msg1,
+ X86Sha256Msg2,
+ X86Sha256Rnds2,
X86Shufpd,
X86Shufps,
X86Sqrtpd,
diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs
index 986b0c9e..19193971 100644
--- a/ARMeilleure/Optimizations.cs
+++ b/ARMeilleure/Optimizations.cs
@@ -21,6 +21,7 @@ namespace ARMeilleure
public static bool UseFmaIfAvailable { get; set; } = true;
public static bool UseAesniIfAvailable { get; set; } = true;
public static bool UsePclmulqdqIfAvailable { get; set; } = true;
+ public static bool UseShaIfAvailable { get; set; } = true;
public static bool ForceLegacySse
{
@@ -40,5 +41,6 @@ namespace ARMeilleure
internal static bool UseFma => UseFmaIfAvailable && HardwareCapabilities.SupportsFma;
internal static bool UseAesni => UseAesniIfAvailable && HardwareCapabilities.SupportsAesni;
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && HardwareCapabilities.SupportsPclmulqdq;
+ internal static bool UseSha => UseShaIfAvailable && HardwareCapabilities.SupportsSha;
}
} \ No newline at end of file
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index db8c74b3..8abb0f6d 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
- private const uint InternalVersion = 3439; //! To be incremented manually for each change to the ARMeilleure project.
+ private const uint InternalVersion = 3585; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";
@@ -946,9 +946,12 @@ namespace ARMeilleure.Translation.PTC
return BitConverter.IsLittleEndian;
}
- private static ulong GetFeatureInfo()
+ private static FeatureInfo GetFeatureInfo()
{
- return (ulong)HardwareCapabilities.FeatureInfoEdx << 32 | (uint)HardwareCapabilities.FeatureInfoEcx;
+ return new FeatureInfo(
+ (uint)HardwareCapabilities.FeatureInfo1Ecx,
+ (uint)HardwareCapabilities.FeatureInfo1Edx,
+ (uint)HardwareCapabilities.FeatureInfo7Ebx);
}
private static byte GetMemoryManagerMode()
@@ -968,7 +971,7 @@ namespace ARMeilleure.Translation.PTC
return osPlatform;
}
- [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 50*/)]
+ [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 54*/)]
private struct OuterHeader
{
public ulong Magic;
@@ -976,7 +979,7 @@ namespace ARMeilleure.Translation.PTC
public uint CacheFileVersion;
public bool Endianness;
- public ulong FeatureInfo;
+ public FeatureInfo FeatureInfo;
public byte MemoryManagerMode;
public uint OSPlatform;
@@ -999,6 +1002,9 @@ namespace ARMeilleure.Translation.PTC
}
}
+ [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 12*/)]
+ private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2);
+
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
private struct InnerHeader
{