diff options
author | riperiperi <rhy3756547@hotmail.com> | 2020-07-13 11:48:14 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-07-13 20:48:14 +1000 |
commit | d7044b10a253dae31b9a0041a432e3a7adce59f6 (patch) | |
tree | 20947496224af14c43803de8169a6493aa77b45b /ARMeilleure/Instructions/InstEmitHash.cs | |
parent | 30d4f752f47217bcdc4dd05428010acf270189d0 (diff) |
Add SSE4.2 Path for CRC32, add A32 variant, add tests for non-castagnoli variants. (#1328)
* Add CRC32 A32 instructions.
* Fix CRC32 instructions.
* Add CRC intrinsic and fast path.
Loop is currently unrolled, will look into adding temp vars after tests are added.
* Begin work on Crc tests
* Fix SSE4.2 path for CRC32C, finialize tests.
* Remove unused IR path.
* Fix spacing between prefix checks.
* This should be Src.
* PTC Version
* OpCodeTable Order
* Integer check improvement. Value and Crc can be either 32 or 64 size.
* This wasn't necessary...
* If size is 3, value type must be I64.
* Fix same src+dest handling for non crc intrinsics.
* Pre-fix (ha) issue with vex encodings
Diffstat (limited to 'ARMeilleure/Instructions/InstEmitHash.cs')
-rw-r--r-- | ARMeilleure/Instructions/InstEmitHash.cs | 147 |
1 files changed, 16 insertions, 131 deletions
diff --git a/ARMeilleure/Instructions/InstEmitHash.cs b/ARMeilleure/Instructions/InstEmitHash.cs index 2a8b3488..82b3e353 100644 --- a/ARMeilleure/Instructions/InstEmitHash.cs +++ b/ARMeilleure/Instructions/InstEmitHash.cs @@ -1,182 +1,67 @@ -// https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf - using ARMeilleure.Decoders; using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Translation; +using static ARMeilleure.Instructions.InstEmitHashHelper; using static ARMeilleure.Instructions.InstEmitHelper; -using static ARMeilleure.Instructions.InstEmitSimdHelper; -using static ARMeilleure.IntermediateRepresentation.OperandHelper; namespace ARMeilleure.Instructions { static partial class InstEmit { + private const int ByteSizeLog2 = 0; + private const int HWordSizeLog2 = 1; + private const int WordSizeLog2 = 2; + private const int DWordSizeLog2 = 3; + public static void Crc32b(ArmEmitterContext context) { - if (Optimizations.UsePclmulqdq) - { - EmitCrc32Optimized(context, false, 8); - } - else - { - EmitCrc32Call(context, nameof(SoftFallback.Crc32b)); - } + EmitCrc32Call(context, ByteSizeLog2, false); } public static void Crc32h(ArmEmitterContext context) { - if (Optimizations.UsePclmulqdq) - { - EmitCrc32Optimized(context, false, 16); - } - else - { - EmitCrc32Call(context, nameof(SoftFallback.Crc32h)); - } + EmitCrc32Call(context, HWordSizeLog2, false); } public static void Crc32w(ArmEmitterContext context) { - if (Optimizations.UsePclmulqdq) - { - EmitCrc32Optimized(context, false, 32); - } - else - { - EmitCrc32Call(context, nameof(SoftFallback.Crc32w)); - } + EmitCrc32Call(context, WordSizeLog2, false); } public static void Crc32x(ArmEmitterContext context) { - if (Optimizations.UsePclmulqdq) - { - EmitCrc32Optimized64(context, false); - } - else - { - EmitCrc32Call(context, nameof(SoftFallback.Crc32x)); - } + EmitCrc32Call(context, DWordSizeLog2, false); } public static void Crc32cb(ArmEmitterContext context) { - if (Optimizations.UsePclmulqdq) - { - EmitCrc32Optimized(context, true, 8); - } - else - { - EmitCrc32Call(context, nameof(SoftFallback.Crc32cb)); - } + EmitCrc32Call(context, ByteSizeLog2, true); } public static void Crc32ch(ArmEmitterContext context) { - if (Optimizations.UsePclmulqdq) - { - EmitCrc32Optimized(context, true, 16); - } - else - { - EmitCrc32Call(context, nameof(SoftFallback.Crc32ch)); - } + EmitCrc32Call(context, HWordSizeLog2, true); } public static void Crc32cw(ArmEmitterContext context) { - if (Optimizations.UsePclmulqdq) - { - EmitCrc32Optimized(context, true, 32); - } - else - { - EmitCrc32Call(context, nameof(SoftFallback.Crc32cw)); - } + EmitCrc32Call(context, WordSizeLog2, true); } public static void Crc32cx(ArmEmitterContext context) { - if (Optimizations.UsePclmulqdq) - { - EmitCrc32Optimized64(context, true); - } - else - { - EmitCrc32Call(context, nameof(SoftFallback.Crc32cx)); - } - } - - private static void EmitCrc32Optimized(ArmEmitterContext context, bool castagnoli, int bitsize) - { - OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp; - - long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))' - long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1 - - Operand crc = GetIntOrZR(context, op.Rn); - Operand data = GetIntOrZR(context, op.Rm); - - crc = context.VectorInsert(context.VectorZero(), crc, 0); - - switch (bitsize) - { - case 8: data = context.VectorInsert8(context.VectorZero(), data, 0); break; - case 16: data = context.VectorInsert16(context.VectorZero(), data, 0); break; - case 32: data = context.VectorInsert(context.VectorZero(), data, 0); break; - } - - Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data); - tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(64 - bitsize)); - tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(0)); - tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0)); - - if (bitsize < 32) - { - crc = context.AddIntrinsic(Intrinsic.X86Pslldq, crc, Const((64 - bitsize) / 8)); - tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, crc); - } - - SetIntOrZR(context, op.Rd, context.VectorExtract(OperandType.I32, tmp, 2)); - } - - private static void EmitCrc32Optimized64(ArmEmitterContext context, bool castagnoli) - { - OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp; - - long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))' - long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1 - - Operand crc = GetIntOrZR(context, op.Rn); - Operand data = GetIntOrZR(context, op.Rm); - - crc = context.VectorInsert(context.VectorZero(), crc, 0); - data = context.VectorInsert(context.VectorZero(), data, 0); - - Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data); - Operand res = context.AddIntrinsic(Intrinsic.X86Pslldq, tmp, Const(4)); - - tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, res, X86GetScalar(context, mu), Const(0)); - tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0)); - - tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, res); - tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(32)); - - tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(1)); - tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0)); - - SetIntOrZR(context, op.Rd, context.VectorExtract(OperandType.I32, tmp, 2)); + EmitCrc32Call(context, DWordSizeLog2, true); } - private static void EmitCrc32Call(ArmEmitterContext context, string name) + private static void EmitCrc32Call(ArmEmitterContext context, int size, bool c) { OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp; Operand n = GetIntOrZR(context, op.Rn); Operand m = GetIntOrZR(context, op.Rm); - Operand d = context.Call(typeof(SoftFallback).GetMethod(name), n, m); + Operand d = EmitCrc32(context, n, m, size, c); SetIntOrZR(context, op.Rd, d); } |