aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/Instructions/InstEmitHash.cs
diff options
context:
space:
mode:
authorriperiperi <rhy3756547@hotmail.com>2020-07-13 11:48:14 +0100
committerGitHub <noreply@github.com>2020-07-13 20:48:14 +1000
commitd7044b10a253dae31b9a0041a432e3a7adce59f6 (patch)
tree20947496224af14c43803de8169a6493aa77b45b /ARMeilleure/Instructions/InstEmitHash.cs
parent30d4f752f47217bcdc4dd05428010acf270189d0 (diff)
Add SSE4.2 Path for CRC32, add A32 variant, add tests for non-castagnoli variants. (#1328)
* Add CRC32 A32 instructions. * Fix CRC32 instructions. * Add CRC intrinsic and fast path. Loop is currently unrolled, will look into adding temp vars after tests are added. * Begin work on Crc tests * Fix SSE4.2 path for CRC32C, finialize tests. * Remove unused IR path. * Fix spacing between prefix checks. * This should be Src. * PTC Version * OpCodeTable Order * Integer check improvement. Value and Crc can be either 32 or 64 size. * This wasn't necessary... * If size is 3, value type must be I64. * Fix same src+dest handling for non crc intrinsics. * Pre-fix (ha) issue with vex encodings
Diffstat (limited to 'ARMeilleure/Instructions/InstEmitHash.cs')
-rw-r--r--ARMeilleure/Instructions/InstEmitHash.cs147
1 files changed, 16 insertions, 131 deletions
diff --git a/ARMeilleure/Instructions/InstEmitHash.cs b/ARMeilleure/Instructions/InstEmitHash.cs
index 2a8b3488..82b3e353 100644
--- a/ARMeilleure/Instructions/InstEmitHash.cs
+++ b/ARMeilleure/Instructions/InstEmitHash.cs
@@ -1,182 +1,67 @@
-// https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
+using static ARMeilleure.Instructions.InstEmitHashHelper;
using static ARMeilleure.Instructions.InstEmitHelper;
-using static ARMeilleure.Instructions.InstEmitSimdHelper;
-using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Instructions
{
static partial class InstEmit
{
+ private const int ByteSizeLog2 = 0;
+ private const int HWordSizeLog2 = 1;
+ private const int WordSizeLog2 = 2;
+ private const int DWordSizeLog2 = 3;
+
public static void Crc32b(ArmEmitterContext context)
{
- if (Optimizations.UsePclmulqdq)
- {
- EmitCrc32Optimized(context, false, 8);
- }
- else
- {
- EmitCrc32Call(context, nameof(SoftFallback.Crc32b));
- }
+ EmitCrc32Call(context, ByteSizeLog2, false);
}
public static void Crc32h(ArmEmitterContext context)
{
- if (Optimizations.UsePclmulqdq)
- {
- EmitCrc32Optimized(context, false, 16);
- }
- else
- {
- EmitCrc32Call(context, nameof(SoftFallback.Crc32h));
- }
+ EmitCrc32Call(context, HWordSizeLog2, false);
}
public static void Crc32w(ArmEmitterContext context)
{
- if (Optimizations.UsePclmulqdq)
- {
- EmitCrc32Optimized(context, false, 32);
- }
- else
- {
- EmitCrc32Call(context, nameof(SoftFallback.Crc32w));
- }
+ EmitCrc32Call(context, WordSizeLog2, false);
}
public static void Crc32x(ArmEmitterContext context)
{
- if (Optimizations.UsePclmulqdq)
- {
- EmitCrc32Optimized64(context, false);
- }
- else
- {
- EmitCrc32Call(context, nameof(SoftFallback.Crc32x));
- }
+ EmitCrc32Call(context, DWordSizeLog2, false);
}
public static void Crc32cb(ArmEmitterContext context)
{
- if (Optimizations.UsePclmulqdq)
- {
- EmitCrc32Optimized(context, true, 8);
- }
- else
- {
- EmitCrc32Call(context, nameof(SoftFallback.Crc32cb));
- }
+ EmitCrc32Call(context, ByteSizeLog2, true);
}
public static void Crc32ch(ArmEmitterContext context)
{
- if (Optimizations.UsePclmulqdq)
- {
- EmitCrc32Optimized(context, true, 16);
- }
- else
- {
- EmitCrc32Call(context, nameof(SoftFallback.Crc32ch));
- }
+ EmitCrc32Call(context, HWordSizeLog2, true);
}
public static void Crc32cw(ArmEmitterContext context)
{
- if (Optimizations.UsePclmulqdq)
- {
- EmitCrc32Optimized(context, true, 32);
- }
- else
- {
- EmitCrc32Call(context, nameof(SoftFallback.Crc32cw));
- }
+ EmitCrc32Call(context, WordSizeLog2, true);
}
public static void Crc32cx(ArmEmitterContext context)
{
- if (Optimizations.UsePclmulqdq)
- {
- EmitCrc32Optimized64(context, true);
- }
- else
- {
- EmitCrc32Call(context, nameof(SoftFallback.Crc32cx));
- }
- }
-
- private static void EmitCrc32Optimized(ArmEmitterContext context, bool castagnoli, int bitsize)
- {
- OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
-
- long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
- long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
-
- Operand crc = GetIntOrZR(context, op.Rn);
- Operand data = GetIntOrZR(context, op.Rm);
-
- crc = context.VectorInsert(context.VectorZero(), crc, 0);
-
- switch (bitsize)
- {
- case 8: data = context.VectorInsert8(context.VectorZero(), data, 0); break;
- case 16: data = context.VectorInsert16(context.VectorZero(), data, 0); break;
- case 32: data = context.VectorInsert(context.VectorZero(), data, 0); break;
- }
-
- Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
- tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(64 - bitsize));
- tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(0));
- tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
-
- if (bitsize < 32)
- {
- crc = context.AddIntrinsic(Intrinsic.X86Pslldq, crc, Const((64 - bitsize) / 8));
- tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, crc);
- }
-
- SetIntOrZR(context, op.Rd, context.VectorExtract(OperandType.I32, tmp, 2));
- }
-
- private static void EmitCrc32Optimized64(ArmEmitterContext context, bool castagnoli)
- {
- OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
-
- long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
- long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
-
- Operand crc = GetIntOrZR(context, op.Rn);
- Operand data = GetIntOrZR(context, op.Rm);
-
- crc = context.VectorInsert(context.VectorZero(), crc, 0);
- data = context.VectorInsert(context.VectorZero(), data, 0);
-
- Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
- Operand res = context.AddIntrinsic(Intrinsic.X86Pslldq, tmp, Const(4));
-
- tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, res, X86GetScalar(context, mu), Const(0));
- tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
-
- tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, res);
- tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(32));
-
- tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(1));
- tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
-
- SetIntOrZR(context, op.Rd, context.VectorExtract(OperandType.I32, tmp, 2));
+ EmitCrc32Call(context, DWordSizeLog2, true);
}
- private static void EmitCrc32Call(ArmEmitterContext context, string name)
+ private static void EmitCrc32Call(ArmEmitterContext context, int size, bool c)
{
OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
Operand n = GetIntOrZR(context, op.Rn);
Operand m = GetIntOrZR(context, op.Rm);
- Operand d = context.Call(typeof(SoftFallback).GetMethod(name), n, m);
+ Operand d = EmitCrc32(context, n, m, size, c);
SetIntOrZR(context, op.Rd, d);
}