diff options
author | gdkchan <gab.dark.100@gmail.com> | 2020-03-10 21:49:27 -0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-11 11:49:27 +1100 |
commit | c26f3774bdbf3982149a3ea4c0f7abb4de869db7 (patch) | |
tree | 45805ff76e7a4f486d5132d39ec7f901f462adcb | |
parent | 89ccec197ec9a5db2bb308ef3e9178910d1ab7a8 (diff) |
Implement VMULL, VMLSL, VRSHR, VQRSHRN, VQRSHRUN AArch32 instructions + other fixes (#977)
* Implement VMULL, VMLSL, VQRSHRN, VQRSHRUN AArch32 instructions plus other fixes
* Re-align opcode table
* Re-enable undefined, use subclasses to fix checks
* Add test and fix VRSHR instruction
* PR feedback
-rw-r--r-- | ARMeilleure/Decoders/OpCode32SimdRegElem.cs | 13 | ||||
-rw-r--r-- | ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs | 19 | ||||
-rw-r--r-- | ARMeilleure/Decoders/OpCode32SimdRegLong.cs | 14 | ||||
-rw-r--r-- | ARMeilleure/Decoders/OpCode32SimdShImm.cs | 2 | ||||
-rw-r--r-- | ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs | 7 | ||||
-rw-r--r-- | ARMeilleure/Decoders/OpCodeTable.cs | 284 | ||||
-rw-r--r-- | ARMeilleure/Instructions/InstEmitAlu32.cs | 2 | ||||
-rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs | 203 | ||||
-rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdHelper32.cs | 95 | ||||
-rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdShift32.cs | 182 | ||||
-rw-r--r-- | ARMeilleure/Instructions/InstName.cs | 5 | ||||
-rw-r--r-- | Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs | 74 | ||||
-rw-r--r-- | Ryujinx.Tests/Cpu/CpuTestSimdRegElem32.cs | 81 | ||||
-rw-r--r-- | Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs | 94 |
14 files changed, 830 insertions, 245 deletions
diff --git a/ARMeilleure/Decoders/OpCode32SimdRegElem.cs b/ARMeilleure/Decoders/OpCode32SimdRegElem.cs index 4bf15cca..c3b912b1 100644 --- a/ARMeilleure/Decoders/OpCode32SimdRegElem.cs +++ b/ARMeilleure/Decoders/OpCode32SimdRegElem.cs @@ -6,13 +6,20 @@ { Q = ((opCode >> 24) & 0x1) != 0; F = ((opCode >> 8) & 0x1) != 0; - Size = ((opCode >> 20) & 0x3); + Size = (opCode >> 20) & 0x3; RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; - Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + if (Size == 1) + { + Vm = ((opCode >> 3) & 0x1) | ((opCode >> 4) & 0x2) | ((opCode << 2) & 0x1c); + } + else /* if (Size == 2) */ + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + } - if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vn) || Size == 0 || (Size == 1 && F)) + if (GetType() == typeof(OpCode32SimdRegElem) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vn) || Size == 0 || (Size == 1 && F)) { Instruction = InstDescriptor.Undefined; } diff --git a/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs b/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs new file mode 100644 index 00000000..26f86f34 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegElemLong : OpCode32SimdRegElem + { + public OpCode32SimdRegElemLong(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Q = false; + F = false; + + RegisterSize = RegisterSize.Simd64; + + // (Vd & 1) != 0 || Size == 3 are also invalid, but they are checked on encoding. + if (Size == 0) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdRegLong.cs b/ARMeilleure/Decoders/OpCode32SimdRegLong.cs new file mode 100644 index 00000000..24ae42d8 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdRegLong.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegLong : OpCode32SimdReg + { + public bool Polynomial { get; private set; } + + public OpCode32SimdRegLong(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Q = false; + RegisterSize = RegisterSize.Simd64; + Polynomial = ((opCode >> 9) & 0x1) != 0; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdShImm.cs b/ARMeilleure/Decoders/OpCode32SimdShImm.cs index b19a601f..53da6bfb 100644 --- a/ARMeilleure/Decoders/OpCode32SimdShImm.cs +++ b/ARMeilleure/Decoders/OpCode32SimdShImm.cs @@ -35,7 +35,7 @@ Instruction = InstDescriptor.Undefined; } - if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + if (GetType() == typeof(OpCode32SimdShImm) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) { Instruction = InstDescriptor.Undefined; } diff --git a/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs b/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs new file mode 100644 index 00000000..80f44142 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdShImmNarrow : OpCode32SimdShImm + { + public OpCode32SimdShImmNarrow(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { } + } +} diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index eac31706..4010e3ab 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -752,144 +752,152 @@ namespace ARMeilleure.Decoders SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth, InstEmit32.Uxth, typeof(OpCode32AluUx)); // FP & SIMD - SetA32("<<<<11101x110000xxxx10xx11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, typeof(OpCode32SimdRegS)); - SetA32("111100111x11xx01xxxx0x110xx0xxxx", InstName.Vabs, InstEmit32.Vabs_V, typeof(OpCode32SimdReg)); - SetA32("111100100xxxxxxxxxxx1000xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_I, typeof(OpCode32SimdReg)); - SetA32("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, typeof(OpCode32SimdRegS)); - SetA32("111100100x00xxxxxxxx1101xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_V, typeof(OpCode32SimdReg)); - SetA32("111100100x00xxxxxxxx0001xxx1xxxx", InstName.Vand, InstEmit32.Vand_I, typeof(OpCode32SimdBinary)); - SetA32("111100110x11xxxxxxxx0001xxx1xxxx", InstName.Vbif, InstEmit32.Vbif, typeof(OpCode32SimdBinary)); - SetA32("111100110x10xxxxxxxx0001xxx1xxxx", InstName.Vbit, InstEmit32.Vbit, typeof(OpCode32SimdBinary)); - SetA32("111100110x01xxxxxxxx0001xxx1xxxx", InstName.Vbsl, InstEmit32.Vbsl, typeof(OpCode32SimdBinary)); - SetA32("111100110x<<xxxxxxxx1000xxx1xxxx", InstName.Vceq, InstEmit32.Vceq_I, typeof(OpCode32SimdReg)); - SetA32("111100100x00xxxxxxxx1110xxx0xxxx", InstName.Vceq, InstEmit32.Vceq_V, typeof(OpCode32SimdReg)); - SetA32("111100111x11xx01xxxx0x010xx0xxxx", InstName.Vceq, InstEmit32.Vceq_Z, typeof(OpCode32SimdCmpZ)); - SetA32("1111001x0x<<xxxxxxxx0011xxx1xxxx", InstName.Vcge, InstEmit32.Vcge_I, typeof(OpCode32SimdReg)); - SetA32("111100110x00xxxxxxxx1110xxx0xxxx", InstName.Vcge, InstEmit32.Vcge_V, typeof(OpCode32SimdReg)); - SetA32("111100111x11xx01xxxx0x001xx0xxxx", InstName.Vcge, InstEmit32.Vcge_Z, typeof(OpCode32SimdCmpZ)); - SetA32("1111001x0x<<xxxxxxxx0011xxx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_I, typeof(OpCode32SimdReg)); - SetA32("111100110x10xxxxxxxx1110xxx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_V, typeof(OpCode32SimdReg)); - SetA32("111100111x11xx01xxxx0x000xx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_Z, typeof(OpCode32SimdCmpZ)); - SetA32("111100111x11xx01xxxx0x011xx0xxxx", InstName.Vcle, InstEmit32.Vcle_Z, typeof(OpCode32SimdCmpZ)); - SetA32("111100111x11xx01xxxx0x100xx0xxxx", InstName.Vclt, InstEmit32.Vclt_Z, typeof(OpCode32SimdCmpZ)); - SetA32("<<<<11101x11010xxxxx101x01x0xxxx", InstName.Vcmp, InstEmit32.Vcmp, typeof(OpCode32SimdS)); - SetA32("<<<<11101x11010xxxxx101x11x0xxxx", InstName.Vcmpe, InstEmit32.Vcmpe, typeof(OpCode32SimdS)); - SetA32("<<<<11101x110111xxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FD, typeof(OpCode32SimdS)); // FP 32 and 64, scalar. - SetA32("<<<<11101x11110xxxxx10xx11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // FP32 to int. - SetA32("<<<<11101x111000xxxx10xxx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // Int to FP32. - SetA32("111111101x1111xxxxxx10>>x1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_R, typeof(OpCode32SimdCvtFI)); // The many FP32 to int encodings (fp). - SetA32("111100111x111011xxxx011xxxx0xxxx", InstName.Vcvt, InstEmit32.Vcvt_V, typeof(OpCode32SimdCmpZ)); // FP and integer, vector. - SetA32("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, typeof(OpCode32SimdRegS)); - SetA32("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, typeof(OpCode32SimdDupGP)); - SetA32("111100111x11xxxxxxxx11000xx0xxxx", InstName.Vdup, InstEmit32.Vdup_1, typeof(OpCode32SimdDupElem)); - SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, typeof(OpCode32SimdExt)); - SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemSingle)); - SetA32("111101000x10xxxxxxxx0111xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 1. - SetA32("111101000x10xxxxxxxx1010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 2. - SetA32("111101000x10xxxxxxxx0110xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 3. - SetA32("111101000x10xxxxxxxx0010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 4. - SetA32("111101001x10xxxxxxxxxx01xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemSingle)); - SetA32("111101000x10xxxxxxxx100xxxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemPair)); // Regs = 1, inc = 1/2 (itype). - SetA32("111101000x10xxxxxxxx0011xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemPair)); // Regs = 2, inc = 2. - SetA32("111101001x10xxxxxxxxxx10xxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemSingle)); - SetA32("111101000x10xxxxxxxx010xxxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). - SetA32("111101001x10xxxxxxxxxx11xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemSingle)); - SetA32("111101000x10xxxxxxxx000xxxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). - SetA32("<<<<11001x01xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11001x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11010x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11001x01xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11001x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11010x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<1101xx01xxxxxxxx101xxxxxxxxx", InstName.Vldr, InstEmit32.Vldr, typeof(OpCode32SimdMemImm)); - SetA32("1111001x0x<<xxxxxxxx0110xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_I, typeof(OpCode32SimdReg)); - SetA32("111100100x00xxxxxxxx1111xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_V, typeof(OpCode32SimdReg)); - SetA32("1111001x0x<<xxxxxxxx0110xxx1xxxx", InstName.Vmin, InstEmit32.Vmin_I, typeof(OpCode32SimdReg)); - SetA32("111100100x10xxxxxxxx1111xxx0xxxx", InstName.Vmin, InstEmit32.Vmin_V, typeof(OpCode32SimdReg)); - SetA32("111111101x00xxxxxxxx10>>x0x0xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_S, typeof(OpCode32SimdRegS)); - SetA32("111100110x0xxxxxxxxx1111xxx1xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_V, typeof(OpCode32SimdReg)); - SetA32("111111101x00xxxxxxxx10>>x1x0xxxx", InstName.Vminnm, InstEmit32.Vminnm_S, typeof(OpCode32SimdRegS)); - SetA32("111100110x1xxxxxxxxx1111xxx1xxxx", InstName.Vminnm, InstEmit32.Vminnm_V, typeof(OpCode32SimdReg)); - SetA32("1111001x1x<<xxxxxxxx000xx1x0xxxx", InstName.Vmla, InstEmit32.Vmla_1, typeof(OpCode32SimdRegElem)); - SetA32("111100100xxxxxxxxxxx1001xxx0xxxx", InstName.Vmla, InstEmit32.Vmla_I, typeof(OpCode32SimdReg)); - SetA32("<<<<11100x00xxxxxxxx101xx0x0xxxx", InstName.Vmla, InstEmit32.Vmla_S, typeof(OpCode32SimdRegS)); - SetA32("111100100x00xxxxxxxx1101xxx1xxxx", InstName.Vmla, InstEmit32.Vmla_V, typeof(OpCode32SimdReg)); - SetA32("1111001x1x<<xxxxxxxx010xx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_1, typeof(OpCode32SimdRegElem)); - SetA32("<<<<11100x00xxxxxxxx101xx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_S, typeof(OpCode32SimdRegS)); - SetA32("111100100x10xxxxxxxx1101xxx1xxxx", InstName.Vmls, InstEmit32.Vmls_V, typeof(OpCode32SimdReg)); - SetA32("111100110xxxxxxxxxxx1001xxx0xxxx", InstName.Vmls, InstEmit32.Vmls_I, typeof(OpCode32SimdReg)); - SetA32("<<<<11100xx0xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, typeof(OpCode32SimdMovGpElem)); // From gen purpose. - SetA32("<<<<1110xxx1xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, typeof(OpCode32SimdMovGpElem)); // To gen purpose. - SetA32("<<<<1100010xxxxxxxxx101000x1xxxx", InstName.Vmov, InstEmit32.Vmov_G2, typeof(OpCode32SimdMovGpDouble)); // To/from gen purpose x2 and single precision x2. - SetA32("<<<<1100010xxxxxxxxx101100x1xxxx", InstName.Vmov, InstEmit32.Vmov_GD, typeof(OpCode32SimdMovGpDouble)); // To/from gen purpose x2 and double precision. - SetA32("<<<<1110000xxxxxxxxx1010x0010000", InstName.Vmov, InstEmit32.Vmov_GS, typeof(OpCode32SimdMovGp)); // To/from gen purpose and single precision. - SetA32("1111001x1x000xxxxxxx0xx00x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q vector I32. - SetA32("<<<<11101x11xxxxxxxx101x0000xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm44)); // Scalar f16/32/64 based on size 01 10 11. - SetA32("1111001x1x000xxxxxxx10x00x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q I16. - SetA32("1111001x1x000xxxxxxx11xx0x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q (dt - from cmode). - SetA32("1111001x1x000xxxxxxx11100x11xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q I64. - SetA32("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov, InstEmit32.Vmov_S, typeof(OpCode32SimdS)); - SetA32("1111001x1x001000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong)); - SetA32("1111001x1x010000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong)); - SetA32("1111001x1x100000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong)); - SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn, InstEmit32.Vmovn, typeof(OpCode32SimdCmpZ)); - SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, typeof(OpCode32SimdSpecial)); - SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, typeof(OpCode32SimdSpecial)); - SetA32("1111001x1x<<xxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, typeof(OpCode32SimdRegElem)); - SetA32("1111001x0xxxxxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg)); - SetA32("<<<<11100x10xxxxxxxx101xx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, typeof(OpCode32SimdRegS)); - SetA32("111100110x00xxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, typeof(OpCode32SimdReg)); - SetA32("1111001x1x000xxxxxxx0xx00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm)); // D/Q vector I32. - SetA32("1111001x1x000xxxxxxx10x00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm)); - SetA32("1111001x1x000xxxxxxx110x0x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm)); - SetA32("<<<<11101x110001xxxx101x01x0xxxx", InstName.Vneg, InstEmit32.Vneg_S, typeof(OpCode32SimdS)); - SetA32("111100111x11xx01xxxx0x111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, typeof(OpCode32Simd)); - SetA32("<<<<11100x01xxxxxxxx101xx1x0xxxx", InstName.Vnmla, InstEmit32.Vnmla_S, typeof(OpCode32SimdRegS)); - SetA32("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls, InstEmit32.Vnmls_S, typeof(OpCode32SimdRegS)); - SetA32("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, typeof(OpCode32SimdRegS)); - SetA32("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr, InstEmit32.Vorr_I, typeof(OpCode32SimdBinary)); - SetA32("1111001x1x000xxxxxxx0xx10x01xxxx", InstName.Vorr, InstEmit32.Vorr_II, typeof(OpCode32SimdImm)); - SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd, InstEmit32.Vpadd_I, typeof(OpCode32SimdReg)); - SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd, InstEmit32.Vpadd_V, typeof(OpCode32SimdReg)); - SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte)); - SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, typeof(OpCode32SimdReg)); - SetA32("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev, InstEmit32.Vrev, typeof(OpCode32SimdRev)); - SetA32("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, typeof(OpCode32SimdCvtFI)); - SetA32("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, typeof(OpCode32SimdCvtFI)); - SetA32("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, typeof(OpCode32SimdSqrte)); - SetA32("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, typeof(OpCode32SimdReg)); - SetA32("111111100xxxxxxxxxxx101xx0x0xxxx", InstName.Vsel, InstEmit32.Vsel, typeof(OpCode32SimdSel)); - SetA32("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, typeof(OpCode32SimdShImm)); - SetA32("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, typeof(OpCode32SimdReg)); - SetA32("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, typeof(OpCode32SimdShImm)); - SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, typeof(OpCode32SimdShImm)); - SetA32("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, typeof(OpCode32SimdS)); - SetA32("111101001x00xxxxxxxx<<00xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemSingle)); - SetA32("111101000x00xxxxxxxx0111xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 1. - SetA32("111101000x00xxxxxxxx1010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 2. - SetA32("111101000x00xxxxxxxx0110xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 3. - SetA32("111101000x00xxxxxxxx0010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 4. - SetA32("111101001x00xxxxxxxx<<01xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemSingle)); - SetA32("111101000x00xxxxxxxx100xxxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemPair)); // Regs = 1, inc = 1/2 (itype). - SetA32("111101000x00xxxxxxxx0011xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemPair)); // Regs = 2, inc = 2. - SetA32("111101001x00xxxxxxxx<<10xxxxxxxx", InstName.Vst3, InstEmit32.Vst3, typeof(OpCode32SimdMemSingle)); - SetA32("111101000x00xxxxxxxx010xxxxxxxxx", InstName.Vst3, InstEmit32.Vst3, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). - SetA32("111101001x00xxxxxxxx<<11xxxxxxxx", InstName.Vst4, InstEmit32.Vst4, typeof(OpCode32SimdMemSingle)); - SetA32("111101000x00xxxxxxxx000xxxxxxxxx", InstName.Vst4, InstEmit32.Vst4, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). - SetA32("<<<<11001x00xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11001x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11010x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11001x00xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11001x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<11010x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); - SetA32("<<<<1101xx00xxxxxxxx101xxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, typeof(OpCode32SimdMemImm)); - SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, typeof(OpCode32SimdReg)); - SetA32("<<<<11100x11xxxxxxxx101xx1x0xxxx", InstName.Vsub, InstEmit32.Vsub_S, typeof(OpCode32SimdRegS)); - SetA32("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, typeof(OpCode32SimdReg)); - SetA32("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl, InstEmit32.Vtbl, typeof(OpCode32SimdTbl)); - SetA32("111100111x11<<10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, typeof(OpCode32SimdCmpZ)); - SetA32("111100111x11<<10xxxx00010xx0xxxx", InstName.Vuzp, InstEmit32.Vuzp, typeof(OpCode32SimdCmpZ)); - SetA32("111100111x11<<10xxxx00011xx0xxxx", InstName.Vzip, InstEmit32.Vzip, typeof(OpCode32SimdCmpZ)); + SetA32("<<<<11101x110000xxxx10xx11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, typeof(OpCode32SimdRegS)); + SetA32("111100111x11xx01xxxx0x110xx0xxxx", InstName.Vabs, InstEmit32.Vabs_V, typeof(OpCode32SimdReg)); + SetA32("111100100xxxxxxxxxxx1000xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_I, typeof(OpCode32SimdReg)); + SetA32("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, typeof(OpCode32SimdRegS)); + SetA32("111100100x00xxxxxxxx1101xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_V, typeof(OpCode32SimdReg)); + SetA32("111100100x00xxxxxxxx0001xxx1xxxx", InstName.Vand, InstEmit32.Vand_I, typeof(OpCode32SimdBinary)); + SetA32("111100110x11xxxxxxxx0001xxx1xxxx", InstName.Vbif, InstEmit32.Vbif, typeof(OpCode32SimdBinary)); + SetA32("111100110x10xxxxxxxx0001xxx1xxxx", InstName.Vbit, InstEmit32.Vbit, typeof(OpCode32SimdBinary)); + SetA32("111100110x01xxxxxxxx0001xxx1xxxx", InstName.Vbsl, InstEmit32.Vbsl, typeof(OpCode32SimdBinary)); + SetA32("111100110x<<xxxxxxxx1000xxx1xxxx", InstName.Vceq, InstEmit32.Vceq_I, typeof(OpCode32SimdReg)); + SetA32("111100100x00xxxxxxxx1110xxx0xxxx", InstName.Vceq, InstEmit32.Vceq_V, typeof(OpCode32SimdReg)); + SetA32("111100111x11xx01xxxx0x010xx0xxxx", InstName.Vceq, InstEmit32.Vceq_Z, typeof(OpCode32SimdCmpZ)); + SetA32("1111001x0x<<xxxxxxxx0011xxx1xxxx", InstName.Vcge, InstEmit32.Vcge_I, typeof(OpCode32SimdReg)); + SetA32("111100110x00xxxxxxxx1110xxx0xxxx", InstName.Vcge, InstEmit32.Vcge_V, typeof(OpCode32SimdReg)); + SetA32("111100111x11xx01xxxx0x001xx0xxxx", InstName.Vcge, InstEmit32.Vcge_Z, typeof(OpCode32SimdCmpZ)); + SetA32("1111001x0x<<xxxxxxxx0011xxx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_I, typeof(OpCode32SimdReg)); + SetA32("111100110x10xxxxxxxx1110xxx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_V, typeof(OpCode32SimdReg)); + SetA32("111100111x11xx01xxxx0x000xx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_Z, typeof(OpCode32SimdCmpZ)); + SetA32("111100111x11xx01xxxx0x011xx0xxxx", InstName.Vcle, InstEmit32.Vcle_Z, typeof(OpCode32SimdCmpZ)); + SetA32("111100111x11xx01xxxx0x100xx0xxxx", InstName.Vclt, InstEmit32.Vclt_Z, typeof(OpCode32SimdCmpZ)); + SetA32("<<<<11101x11010xxxxx101x01x0xxxx", InstName.Vcmp, InstEmit32.Vcmp, typeof(OpCode32SimdS)); + SetA32("<<<<11101x11010xxxxx101x11x0xxxx", InstName.Vcmpe, InstEmit32.Vcmpe, typeof(OpCode32SimdS)); + SetA32("<<<<11101x110111xxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FD, typeof(OpCode32SimdS)); // FP 32 and 64, scalar. + SetA32("<<<<11101x11110xxxxx10xx11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // FP32 to int. + SetA32("<<<<11101x111000xxxx10xxx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // Int to FP32. + SetA32("111111101x1111xxxxxx10>>x1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_R, typeof(OpCode32SimdCvtFI)); // The many FP32 to int encodings (fp). + SetA32("111100111x111011xxxx011xxxx0xxxx", InstName.Vcvt, InstEmit32.Vcvt_V, typeof(OpCode32SimdCmpZ)); // FP and integer, vector. + SetA32("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, typeof(OpCode32SimdRegS)); + SetA32("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, typeof(OpCode32SimdDupGP)); + SetA32("111100111x11xxxxxxxx11000xx0xxxx", InstName.Vdup, InstEmit32.Vdup_1, typeof(OpCode32SimdDupElem)); + SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, typeof(OpCode32SimdExt)); + SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx0111xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 1. + SetA32("111101000x10xxxxxxxx1010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 2. + SetA32("111101000x10xxxxxxxx0110xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 3. + SetA32("111101000x10xxxxxxxx0010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 4. + SetA32("111101001x10xxxxxxxxxx01xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx100xxxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemPair)); // Regs = 1, inc = 1/2 (itype). + SetA32("111101000x10xxxxxxxx0011xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemPair)); // Regs = 2, inc = 2. + SetA32("111101001x10xxxxxxxxxx10xxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx010xxxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("111101001x10xxxxxxxxxx11xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx000xxxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("<<<<11001x01xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x01xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<1101xx01xxxxxxxx101xxxxxxxxx", InstName.Vldr, InstEmit32.Vldr, typeof(OpCode32SimdMemImm)); + SetA32("1111001x0x<<xxxxxxxx0110xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_I, typeof(OpCode32SimdReg)); + SetA32("111100100x00xxxxxxxx1111xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_V, typeof(OpCode32SimdReg)); + SetA32("1111001x0x<<xxxxxxxx0110xxx1xxxx", InstName.Vmin, InstEmit32.Vmin_I, typeof(OpCode32SimdReg)); + SetA32("111100100x10xxxxxxxx1111xxx0xxxx", InstName.Vmin, InstEmit32.Vmin_V, typeof(OpCode32SimdReg)); + SetA32("111111101x00xxxxxxxx10>>x0x0xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_S, typeof(OpCode32SimdRegS)); + SetA32("111100110x0xxxxxxxxx1111xxx1xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_V, typeof(OpCode32SimdReg)); + SetA32("111111101x00xxxxxxxx10>>x1x0xxxx", InstName.Vminnm, InstEmit32.Vminnm_S, typeof(OpCode32SimdRegS)); + SetA32("111100110x1xxxxxxxxx1111xxx1xxxx", InstName.Vminnm, InstEmit32.Vminnm_V, typeof(OpCode32SimdReg)); + SetA32("1111001x1x<<xxxxxxxx000xx1x0xxxx", InstName.Vmla, InstEmit32.Vmla_1, typeof(OpCode32SimdRegElem)); + SetA32("111100100xxxxxxxxxxx1001xxx0xxxx", InstName.Vmla, InstEmit32.Vmla_I, typeof(OpCode32SimdReg)); + SetA32("<<<<11100x00xxxxxxxx101xx0x0xxxx", InstName.Vmla, InstEmit32.Vmla_S, typeof(OpCode32SimdRegS)); + SetA32("111100100x00xxxxxxxx1101xxx1xxxx", InstName.Vmla, InstEmit32.Vmla_V, typeof(OpCode32SimdReg)); + SetA32("1111001x1x<<xxxxxxxx010xx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_1, typeof(OpCode32SimdRegElem)); + SetA32("<<<<11100x00xxxxxxxx101xx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_S, typeof(OpCode32SimdRegS)); + SetA32("111100100x10xxxxxxxx1101xxx1xxxx", InstName.Vmls, InstEmit32.Vmls_V, typeof(OpCode32SimdReg)); + SetA32("111100110xxxxxxxxxxx1001xxx0xxxx", InstName.Vmls, InstEmit32.Vmls_I, typeof(OpCode32SimdReg)); + SetA32("1111001x1x<<xxxxxxx01010x0x0xxxx", InstName.Vmlsl, InstEmit32.Vmlsl_I, typeof(OpCode32SimdRegLong)); + SetA32("<<<<11100xx0xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, typeof(OpCode32SimdMovGpElem)); // From gen purpose. + SetA32("<<<<1110xxx1xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, typeof(OpCode32SimdMovGpElem)); // To gen purpose. + SetA32("<<<<1100010xxxxxxxxx101000x1xxxx", InstName.Vmov, InstEmit32.Vmov_G2, typeof(OpCode32SimdMovGpDouble)); // To/from gen purpose x2 and single precision x2. + SetA32("<<<<1100010xxxxxxxxx101100x1xxxx", InstName.Vmov, InstEmit32.Vmov_GD, typeof(OpCode32SimdMovGpDouble)); // To/from gen purpose x2 and double precision. + SetA32("<<<<1110000xxxxxxxxx1010x0010000", InstName.Vmov, InstEmit32.Vmov_GS, typeof(OpCode32SimdMovGp)); // To/from gen purpose and single precision. + SetA32("1111001x1x000xxxxxxx0xx00x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q vector I32. + SetA32("<<<<11101x11xxxxxxxx101x0000xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm44)); // Scalar f16/32/64 based on size 01 10 11. + SetA32("1111001x1x000xxxxxxx10x00x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q I16. + SetA32("1111001x1x000xxxxxxx11xx0x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q (dt - from cmode). + SetA32("1111001x1x000xxxxxxx11100x11xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q I64. + SetA32("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov, InstEmit32.Vmov_S, typeof(OpCode32SimdS)); + SetA32("1111001x1x001000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong)); + SetA32("1111001x1x010000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong)); + SetA32("1111001x1x100000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong)); + SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn, InstEmit32.Vmovn, typeof(OpCode32SimdCmpZ)); + SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, typeof(OpCode32SimdSpecial)); + SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, typeof(OpCode32SimdSpecial)); + SetA32("1111001x1x<<xxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, typeof(OpCode32SimdRegElem)); + SetA32("111100100x<<xxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg)); + SetA32("111100110x00xxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg)); + SetA32("<<<<11100x10xxxxxxxx101xx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, typeof(OpCode32SimdRegS)); + SetA32("111100110x00xxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, typeof(OpCode32SimdReg)); + SetA32("1111001x1x<<xxxxxxx01010x1x0xxxx", InstName.Vmull, InstEmit32.Vmull_1, typeof(OpCode32SimdRegElemLong)); + SetA32("1111001x1x<<xxxxxxx01100x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, typeof(OpCode32SimdRegLong)); + SetA32("111100101x00xxxxxxx01110x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, typeof(OpCode32SimdRegLong)); // Polynomial + SetA32("1111001x1x000xxxxxxx0xx00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm)); // D/Q vector I32. + SetA32("1111001x1x000xxxxxxx10x00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm)); + SetA32("1111001x1x000xxxxxxx110x0x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm)); + SetA32("<<<<11101x110001xxxx101x01x0xxxx", InstName.Vneg, InstEmit32.Vneg_S, typeof(OpCode32SimdS)); + SetA32("111100111x11xx01xxxx0x111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, typeof(OpCode32Simd)); + SetA32("<<<<11100x01xxxxxxxx101xx1x0xxxx", InstName.Vnmla, InstEmit32.Vnmla_S, typeof(OpCode32SimdRegS)); + SetA32("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls, InstEmit32.Vnmls_S, typeof(OpCode32SimdRegS)); + SetA32("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, typeof(OpCode32SimdRegS)); + SetA32("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr, InstEmit32.Vorr_I, typeof(OpCode32SimdBinary)); + SetA32("1111001x1x000xxxxxxx0xx10x01xxxx", InstName.Vorr, InstEmit32.Vorr_II, typeof(OpCode32SimdImm)); + SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd, InstEmit32.Vpadd_I, typeof(OpCode32SimdReg)); + SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd, InstEmit32.Vpadd_V, typeof(OpCode32SimdReg)); + SetA32("1111001x1x>>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, typeof(OpCode32SimdShImmNarrow)); + SetA32("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, typeof(OpCode32SimdShImmNarrow)); + SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte)); + SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, typeof(OpCode32SimdReg)); + SetA32("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev, InstEmit32.Vrev, typeof(OpCode32SimdRev)); + SetA32("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, typeof(OpCode32SimdCvtFI)); + SetA32("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, typeof(OpCode32SimdCvtFI)); + SetA32("1111001x1x>>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, typeof(OpCode32SimdShImm)); + SetA32("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, typeof(OpCode32SimdSqrte)); + SetA32("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, typeof(OpCode32SimdReg)); + SetA32("111111100xxxxxxxxxxx101xx0x0xxxx", InstName.Vsel, InstEmit32.Vsel, typeof(OpCode32SimdSel)); + SetA32("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, typeof(OpCode32SimdShImm)); + SetA32("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, typeof(OpCode32SimdReg)); + SetA32("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, typeof(OpCode32SimdShImm)); + SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, typeof(OpCode32SimdShImmNarrow)); + SetA32("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, typeof(OpCode32SimdS)); + SetA32("111101001x00xxxxxxxx<<00xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx0111xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 1. + SetA32("111101000x00xxxxxxxx1010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 2. + SetA32("111101000x00xxxxxxxx0110xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 3. + SetA32("111101000x00xxxxxxxx0010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 4. + SetA32("111101001x00xxxxxxxx<<01xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx100xxxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemPair)); // Regs = 1, inc = 1/2 (itype). + SetA32("111101000x00xxxxxxxx0011xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemPair)); // Regs = 2, inc = 2. + SetA32("111101001x00xxxxxxxx<<10xxxxxxxx", InstName.Vst3, InstEmit32.Vst3, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx010xxxxxxxxx", InstName.Vst3, InstEmit32.Vst3, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("111101001x00xxxxxxxx<<11xxxxxxxx", InstName.Vst4, InstEmit32.Vst4, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx000xxxxxxxxx", InstName.Vst4, InstEmit32.Vst4, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("<<<<11001x00xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x00xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<1101xx00xxxxxxxx101xxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, typeof(OpCode32SimdMemImm)); + SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, typeof(OpCode32SimdReg)); + SetA32("<<<<11100x11xxxxxxxx101xx1x0xxxx", InstName.Vsub, InstEmit32.Vsub_S, typeof(OpCode32SimdRegS)); + SetA32("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, typeof(OpCode32SimdReg)); + SetA32("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl, InstEmit32.Vtbl, typeof(OpCode32SimdTbl)); + SetA32("111100111x11<<10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, typeof(OpCode32SimdCmpZ)); + SetA32("111100111x11<<10xxxx00010xx0xxxx", InstName.Vuzp, InstEmit32.Vuzp, typeof(OpCode32SimdCmpZ)); + SetA32("111100111x11<<10xxxx00011xx0xxxx", InstName.Vzip, InstEmit32.Vzip, typeof(OpCode32SimdCmpZ)); #endregion FillFastLookupTable(_instA32FastLookup, _allInstA32); diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs index 469a18a0..d57ff0b6 100644 --- a/ARMeilleure/Instructions/InstEmitAlu32.cs +++ b/ARMeilleure/Instructions/InstEmitAlu32.cs @@ -128,8 +128,6 @@ namespace ARMeilleure.Instructions public static void Cmp(ArmEmitterContext context) { - IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; - Operand n = GetAluN(context); Operand m = GetAluM(context, setCarry: false); diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index 73f25b98..19a10f68 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -2,6 +2,7 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Translation; using System; +using System.Diagnostics; using static ARMeilleure.Instructions.InstEmitFlowHelper; using static ARMeilleure.Instructions.InstEmitHelper; @@ -113,20 +114,13 @@ namespace ARMeilleure.Instructions Operand insert = GetIntA32(context, op.Rt); // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts. - switch (op.Size) + insert = op.Size switch { - case 2: - insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)); - break; - case 1: - insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)); - break; - case 0: - insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)); - break; - default: - throw new InvalidOperationException("Unknown Vdup Size."); - } + 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)), + 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)), + 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)), + _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".") + }; InsertScalar(context, op.Vd, insert); if (op.Q) @@ -142,20 +136,13 @@ namespace ARMeilleure.Instructions Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size); // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts. - switch (op.Size) + insert = op.Size switch { - case 2: - insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)); - break; - case 1: - insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)); - break; - case 0: - insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)); - break; - default: - throw new InvalidOperationException("Unknown Vdup Size."); - } + 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)), + 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)), + 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)), + _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".") + }; InsertScalar(context, op.Vd, insert); if (op.Q) @@ -575,51 +562,53 @@ namespace ARMeilleure.Instructions } } - public static void Vmul_S(ArmEmitterContext context) + public static void Vmla_S(ArmEmitterContext context) { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd); } else if (Optimizations.FastFP) { - EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); } else { - EmitScalarBinaryOpF32(context, (op1, op2) => + EmitScalarTernaryOpF32(context, (op1, op2, op3) => { - return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2); + return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3); }); } } - public static void Vmul_V(ArmEmitterContext context) + public static void Vmla_V(ArmEmitterContext context) { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); + EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd); } else if (Optimizations.FastFP) { - EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); } else { - EmitVectorBinaryOpF32(context, (op1, op2) => + EmitVectorTernaryOpF32(context, (op1, op2, op3) => { - return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2); + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3); }); } } - public static void Vmul_I(ArmEmitterContext context) + public static void Vmla_I(ArmEmitterContext context) { - if ((context.CurrOp as OpCode32SimdReg).U) throw new NotImplementedException("Polynomial mode not implemented"); - EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2)); + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); } - public static void Vmul_1(ArmEmitterContext context) + public static void Vmla_1(ArmEmitterContext context) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; @@ -627,70 +616,70 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); + EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd); } else if (Optimizations.FastFP) { - EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); } else { - EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2)); + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3)); } } else { - EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false); + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false); } } - public static void Vmla_S(ArmEmitterContext context) + public static void Vmls_S(ArmEmitterContext context) { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd); + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd); } else if (Optimizations.FastFP) { EmitScalarTernaryOpF32(context, (op1, op2, op3) => { - return context.Add(op1, context.Multiply(op2, op3)); + return context.Subtract(op1, context.Multiply(op2, op3)); }); } else { EmitScalarTernaryOpF32(context, (op1, op2, op3) => { - return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3); + return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3); }); } } - public static void Vmla_V(ArmEmitterContext context) + public static void Vmls_V(ArmEmitterContext context) { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd); + EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd); } else if (Optimizations.FastFP) { - EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); } else { EmitVectorTernaryOpF32(context, (op1, op2, op3) => { - return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3); + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3); }); } } - public static void Vmla_I(ArmEmitterContext context) + public static void Vmls_I(ArmEmitterContext context) { - EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); } - public static void Vmla_1(ArmEmitterContext context) + public static void Vmls_1(ArmEmitterContext context) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; @@ -698,70 +687,83 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd); + EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd); } else if (Optimizations.FastFP) { - EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); } else { - EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3)); + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3)); } } else { - EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false); + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false); } } - public static void Vmls_S(ArmEmitterContext context) + public static void Vmlsl_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorTernaryLongOpI32(context, (opD, op1, op2) => context.Subtract(opD, context.Multiply(op1, op2)), !op.U); + } + + public static void Vmul_S(ArmEmitterContext context) { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd); + EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); } else if (Optimizations.FastFP) { - EmitScalarTernaryOpF32(context, (op1, op2, op3) => - { - return context.Subtract(op1, context.Multiply(op2, op3)); - }); + EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); } else { - EmitScalarTernaryOpF32(context, (op1, op2, op3) => + EmitScalarBinaryOpF32(context, (op1, op2) => { - return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3); + return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2); }); } } - public static void Vmls_V(ArmEmitterContext context) + public static void Vmul_V(ArmEmitterContext context) { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd); + EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); } else if (Optimizations.FastFP) { - EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); } else { - EmitVectorTernaryOpF32(context, (op1, op2, op3) => + EmitVectorBinaryOpF32(context, (op1, op2) => { - return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3); + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2); }); } } - public static void Vmls_I(ArmEmitterContext context) + public static void Vmul_I(ArmEmitterContext context) { - EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (op.U) // This instruction is always signed, U indicates polynomial mode. + { + EmitVectorBinaryOpZx32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2)); + } } - public static void Vmls_1(ArmEmitterContext context) + public static void Vmul_1(ArmEmitterContext context) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; @@ -769,20 +771,41 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd); + EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); } else if (Optimizations.FastFP) { - EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2)); } else { - EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3)); + EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2)); } } else { - EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false); + EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false); + } + } + + public static void Vmull_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + EmitVectorByScalarLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U); + } + + public static void Vmull_I(ArmEmitterContext context) + { + OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp; + + if (op.Polynomial) + { + EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false); + } + else + { + EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U); } } @@ -1157,5 +1180,27 @@ namespace ARMeilleure.Instructions EmitVectorBinaryOpSimd32(context, genericEmit); } } + + private static Operand EmitPolynomialMultiply(ArmEmitterContext context, Operand op1, Operand op2, int eSize) + { + Debug.Assert(eSize <= 32); + + Operand result = eSize == 32 ? Const(0L) : Const(0); + + if (eSize == 32) + { + op1 = context.ZeroExtend32(OperandType.I64, op1); + op2 = context.ZeroExtend32(OperandType.I64, op2); + } + + for (int i = 0; i < eSize; i++) + { + Operand mask = context.BitwiseAnd(op1, Const(op1.Type, 1L << i)); + + result = context.BitwiseExclusiveOr(result, context.Multiply(op2, mask)); + } + + return result; + } } } diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index d0114e7b..67dc25c7 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -57,7 +57,6 @@ namespace ARMeilleure.Instructions // From dreg. vec = GetVecA32(reg >> 1); insert = context.VectorInsert(vec, value, reg & 1); - } else { @@ -69,6 +68,11 @@ namespace ARMeilleure.Instructions context.Copy(vec, insert); } + public static Operand ExtractElement(ArmEmitterContext context, int reg, int size, bool signed) + { + return EmitVectorExtract32(context, reg >> (4 - size), reg & ((16 >> size) - 1), size, signed); + } + public static void EmitVectorImmUnaryOp32(ArmEmitterContext context, Func1I emit) { IOpCode32SimdImm op = (IOpCode32SimdImm)context.CurrOp; @@ -250,6 +254,57 @@ namespace ARMeilleure.Instructions context.Copy(GetVecA32(op.Qd), res); } + public static void EmitVectorBinaryLongOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + if (op.Size == 2) + { + ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne); + me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me); + } + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + if (op.Size == 2) + { + ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne); + me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me); + } + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; @@ -330,7 +385,7 @@ namespace ARMeilleure.Instructions { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; - Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); + Operand m = ExtractElement(context, op.Vm, op.Size, signed); Operand res = GetVecA32(op.Qd); @@ -340,7 +395,37 @@ namespace ARMeilleure.Instructions { Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); - res = EmitVectorInsert(context, res, emit(ne, m), op.In + index, op.Size); + res = EmitVectorInsert(context, res, emit(ne, m), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorByScalarLongOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand m = ExtractElement(context, op.Vm, op.Size, signed); + + if (op.Size == 2) + { + m = signed ? context.SignExtend32(OperandType.I64, m) : context.ZeroExtend32(OperandType.I64, m); + } + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + + if (op.Size == 2) + { + ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne); + } + + res = EmitVectorInsert(context, res, emit(ne, m), index, op.Size + 1); } context.Copy(GetVecA32(op.Qd), res); @@ -454,7 +539,7 @@ namespace ARMeilleure.Instructions // Narrow - public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit) + public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; @@ -465,7 +550,7 @@ namespace ARMeilleure.Instructions for (int index = 0; index < elems; index++) { - Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, false); + Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, signed); res = EmitVectorInsert(context, res, emit(m), id + index, op.Size); } diff --git a/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/ARMeilleure/Instructions/InstEmitSimdShift32.cs index 89385476..e57c92a3 100644 --- a/ARMeilleure/Instructions/InstEmitSimdShift32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdShift32.cs @@ -1,5 +1,6 @@ using ARMeilleure.Decoders; using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; using ARMeilleure.Translation; using System; using System.Diagnostics; @@ -11,6 +12,78 @@ namespace ARMeilleure.Instructions { static partial class InstEmit32 { + public static void Vqrshrn(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + EmitRoundShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Vqrshrun(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Vrshr(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = GetImmShr(op); + long roundConst = 1L << (shift - 1); + + if (op.U) + { + if (op.Size < 2) + { + EmitVectorUnaryOpZx32(context, (op1) => + { + op1 = context.Add(op1, Const(op1.Type, roundConst)); + + return context.ShiftRightUI(op1, Const(shift)); + }); + } + else if (op.Size == 2) + { + EmitVectorUnaryOpZx32(context, (op1) => + { + op1 = context.ZeroExtend32(OperandType.I64, op1); + op1 = context.Add(op1, Const(op1.Type, roundConst)); + + return context.ConvertI64ToI32(context.ShiftRightUI(op1, Const(shift))); + }); + } + else /* if (op.Size == 3) */ + { + EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: false, roundConst, shift)); + } + } + else + { + if (op.Size < 2) + { + EmitVectorUnaryOpSx32(context, (op1) => + { + op1 = context.Add(op1, Const(op1.Type, roundConst)); + + return context.ShiftRightSI(op1, Const(shift)); + }); + } + else if (op.Size == 2) + { + EmitVectorUnaryOpSx32(context, (op1) => + { + op1 = context.SignExtend32(OperandType.I64, op1); + op1 = context.Add(op1, Const(op1.Type, roundConst)); + + return context.ConvertI64ToI32(context.ShiftRightSI(op1, Const(shift))); + }); + } + else /* if (op.Size == 3) */ + { + EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: true, roundConst, shift)); + } + } + } + public static void Vshl(ArmEmitterContext context) { OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; @@ -35,7 +108,7 @@ namespace ARMeilleure.Instructions public static void Vshr(ArmEmitterContext context) { OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; - int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped. + int shift = GetImmShr(op); int maxShift = (8 << op.Size) - 1; if (op.U) @@ -51,7 +124,7 @@ namespace ARMeilleure.Instructions public static void Vshrn(ArmEmitterContext context) { OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; - int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped. + int shift = GetImmShr(op); EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift))); } @@ -96,5 +169,110 @@ namespace ARMeilleure.Instructions return context.ConditionalSelect(isOutOfRange0, Const(op.Type, 0), context.ConditionalSelect(isOutOfRangeN, min, res)); } } + + [Flags] + private enum ShrImmSaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + Round = 1 << 3, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags); + } + + private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; + bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0; + + if (scalar) + { + // TODO: Support scalar operation. + throw new NotImplementedException(); + } + + int shift = GetImmShr(op); + long roundConst = 1L << (shift - 1); + + EmitVectorUnaryNarrowOp32(context, (op1) => + { + if (op.Size <= 1 || !round) + { + if (round) + { + op1 = context.Add(op1, Const(op1.Type, roundConst)); + } + + op1 = signedSrc ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift)); + } + else /* if (op.Size == 2 && round) */ + { + op1 = EmitShrImm64(context, op1, signedSrc, roundConst, shift); // shift <= 32 + } + + return EmitSatQ(context, op1, 8 << op.Size, signedDst); + }, signedSrc); + } + + private static int GetImmShr(OpCode32SimdShImm op) + { + return (8 << op.Size) - op.Shift; // Shr amount is flipped. + } + + // dst64 = (Int(src64, signed) + roundConst) >> shift; + private static Operand EmitShrImm64( + ArmEmitterContext context, + Operand value, + bool signed, + long roundConst, + int shift) + { + Delegate dlg = signed + ? (Delegate)new _S64_S64_S64_S32(SoftFallback.SignedShrImm64) + : (Delegate)new _U64_U64_S64_S32(SoftFallback.UnsignedShrImm64); + + return context.Call(dlg, value, Const(roundConst), Const(shift)); + } + + private static Operand EmitSatQ(ArmEmitterContext context, Operand value, int eSize, bool signed) + { + Debug.Assert(eSize <= 32); + + long intMin = signed ? -(1L << (eSize - 1)) : 0; + long intMax = signed ? (1L << (eSize - 1)) - 1 : (1L << eSize) - 1; + + Operand gt = context.ICompareGreater(value, Const(value.Type, intMax)); + Operand lt = context.ICompareLess(value, Const(value.Type, intMin)); + + value = context.ConditionalSelect(gt, Const(value.Type, intMax), value); + value = context.ConditionalSelect(lt, Const(value.Type, intMin), value); + + Operand lblNoSat = Label(); + + context.BranchIfFalse(lblNoSat, context.BitwiseOr(gt, lt)); + + // TODO: Set QC (to 1) on FPSCR here. + + context.MarkLabel(lblNoSat); + + return value; + } } } diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 69969e9f..5e92da0a 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -560,12 +560,14 @@ namespace ARMeilleure.Instructions Vminnm, Vmla, Vmls, + Vmlsl, Vmov, Vmovl, Vmovn, Vmrs, Vmsr, Vmul, + Vmull, Vmvn, Vneg, Vnmul, @@ -573,8 +575,11 @@ namespace ARMeilleure.Instructions Vnmls, Vorr, Vpadd, + Vqrshrn, + Vqrshrun, Vrev, Vrint, + Vrshr, Vsel, Vshl, Vshr, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs index a3ba9369..66db63bc 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs @@ -256,7 +256,7 @@ namespace Ryujinx.Tests.Cpu { opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); - } + } else { opcode |= ((rm & 0x1e) >> 1) | ((rm & 0x1) << 5); @@ -284,6 +284,78 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise, Description("VMLSL.<type><size> <Vd>, <Vn>, <Vm>")] + public void Vmlsl_I([Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool u) + { + uint opcode = 0xf2800a00u; // VMLSL.S8 Q0, D0, D0 + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + opcode |= size << 20; + + if (u) + { + opcode |= 1 << 24; + } + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMULL.<size> <Vd>, <Vn>, <Vm>")] + public void Vmull_I([Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool op, + [Values] bool u) + { + uint opcode = 0xf2800c00u; // VMULL.S8 Q0, D0, D0 + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + if (op) + { + opcode |= 1 << 9; + size = 0; + u = false; + } + + opcode |= size << 20; + + if (u) + { + opcode |= 1 << 24; + } + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + [Test, Pairwise, Description("VSHL.<size> {<Vd>}, <Vm>, <Vn>")] public void Vshl([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem32.cs new file mode 100644 index 00000000..e1b367ed --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem32.cs @@ -0,0 +1,81 @@ +#define SimdRegElem32 + +using ARMeilleure.State; +using NUnit.Framework; +using System; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdRegElem32")] + public sealed class CpuTestSimdRegElem32 : CpuTest32 + { +#if SimdRegElem32 + private const int RndCnt = 2; + + [Test, Pairwise, Description("VMUL.<size> {<Vd>}, <Vn>, <Vm>[<index>]")] + public void Vmul_1I([Values(1u, 0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(26u, 25u, 10u, 9u, 2u, 0u)] uint rm, + [Values(1u, 2u)] uint size, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool q) + { + uint opcode = 0xf2900840u & ~(3u << 20); // VMUL.I16 D0, D0, D0[0] + if (q) + { + opcode |= 1 << 24; + rn <<= 1; + rd <<= 1; + } + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + opcode |= size << 20; + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMULL.<size> <Vd>, <Vn>, <Vm>[<index>]")] + public void Vmull_1([Values(2u, 0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(26u, 25u, 10u, 9u, 2u, 0u)] uint rm, + [Values(1u, 2u)] uint size, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool u) + { + uint opcode = 0xf2900a40u & ~(3u << 20); // VMULL.S16 Q0, D0, D0[0] + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + opcode |= size << 20; + + if (u) + { + opcode |= 1 << 24; + } + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs index 6c7b0493..aad4a2a5 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs @@ -11,17 +11,19 @@ namespace Ryujinx.Tests.Cpu #if SimdShImm32 private const int RndCnt = 2; - [Test, Pairwise, Description("VSHL.<size> {<Vd>}, <Vm>, #<imm>")] - public void Vshl_Imm([Values(0u)] uint rd, + [Test, Pairwise] + public void Vrshr_Vshr_Imm([Values(0u)] uint rd, [Values(2u, 0u)] uint rm, [Values(0u, 1u, 2u, 3u)] uint size, [Random(RndCnt), Values(0u)] uint shiftImm, [Random(RndCnt)] ulong z, [Random(RndCnt)] ulong a, [Random(RndCnt)] ulong b, - [Values] bool q) + [Values] bool u, + [Values] bool q, + [Values] bool round) { - uint opcode = 0xf2800510u; // VORR.I32 D0, #0 (immediate value changes it into SHL) + uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 (immediate value changes it into SHR) if (q) { opcode |= 1 << 6; @@ -29,6 +31,16 @@ namespace Ryujinx.Tests.Cpu rd <<= 1; } + if (round) + { + opcode |= 1 << 9; // Turn into VRSHR + } + + if (u) + { + opcode |= 1 << 24; + } + uint imm = 1u << ((int)size + 3); imm |= shiftImm & (imm - 1); @@ -45,18 +57,17 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise, Description("VSHR.<size> {<Vd>}, <Vm>, #<imm>")] - public void Vshr_Imm([Values(0u)] uint rd, + [Test, Pairwise, Description("VSHL.<size> {<Vd>}, <Vm>, #<imm>")] + public void Vshl_Imm([Values(0u)] uint rd, [Values(2u, 0u)] uint rm, [Values(0u, 1u, 2u, 3u)] uint size, [Random(RndCnt), Values(0u)] uint shiftImm, [Random(RndCnt)] ulong z, [Random(RndCnt)] ulong a, [Random(RndCnt)] ulong b, - [Values] bool u, [Values] bool q) { - uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 (immediate value changes it into SHR) + uint opcode = 0xf2800510u; // VORR.I32 D0, #0 (immediate value changes it into SHL) if (q) { opcode |= 1 << 6; @@ -64,11 +75,6 @@ namespace Ryujinx.Tests.Cpu rd <<= 1; } - if (u) - { - opcode |= 1 << 24; - } - uint imm = 1u << ((int)size + 3); imm |= shiftImm & (imm - 1); @@ -85,7 +91,7 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise, Description("VSHRN.<size> {<Vd>}, <Vm>, #<imm>")] + [Test, Pairwise, Description("VSHRN.<size> <Vd>, <Vm>, #<imm>")] public void Vshrn_Imm([Values(0u, 1u)] uint rd, [Values(2u, 0u)] uint rm, [Values(0u, 1u, 2u)] uint size, @@ -111,6 +117,66 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + + [Test, Pairwise, Description("VQRSHRN.<type><size> <Vd>, <Vm>, #<imm>")] + public void Vqrshrn_Imm([Values(0u, 1u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCnt), Values(0u)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool u) + { + uint opcode = 0xf2800950u; // VORR.I16 Q0, #0 (immediate value changes it into QRSHRN) + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16); + + if (u) + { + opcode |= 1u << 24; + } + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VQRSHRUN.<type><size> <Vd>, <Vm>, #<imm>")] + public void Vqrshrun_Imm([Values(0u, 1u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCnt), Values(0u)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b) + { + uint opcode = 0xf3800850u; // VMOV.I16 Q0, #0x80 (immediate value changes it into QRSHRUN) + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } #endif } } |