diff options
Diffstat (limited to 'ARMeilleure/CodeGen')
21 files changed, 5868 insertions, 60 deletions
diff --git a/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs b/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs new file mode 100644 index 00000000..fdd4d024 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs @@ -0,0 +1,270 @@ +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class Arm64Optimizer + { + private const int MaxConstantUses = 10000; + + public static void RunPass(ControlFlowGraph cfg) + { + var constants = new Dictionary<ulong, Operand>(); + + Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source) + { + // If the constant has many uses, we also force a new constant mov to be added, in order + // to avoid overflow of the counts field (that is limited to 16 bits). + if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses) + { + constant = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, constant, source); + + block.Operations.AddBefore(operation, copyOp); + + constants[source.Value] = constant; + } + + return constant; + } + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + constants.Clear(); + + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + // Insert copies for constants that can't fit on a 32-bit immediate. + // Doing this early unblocks a few optimizations. + if (node.Instruction == Instruction.Add) + { + Operand src1 = node.GetSource(0); + Operand src2 = node.GetSource(1); + + if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32))) + { + node.SetSource(0, GetConstantCopy(block, node, src1)); + } + + if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32))) + { + node.SetSource(1, GetConstantCopy(block, node, src2)); + } + } + + // Try to fold something like: + // lsl x1, x1, #2 + // add x0, x0, x1 + // ldr x0, [x0] + // add x2, x2, #16 + // ldr x2, [x2] + // Into: + // ldr x0, [x0, x1, lsl #2] + // ldr x2, [x2, #16] + if (IsMemoryLoadOrStore(node.Instruction)) + { + OperandType type; + + if (node.Destination != default) + { + type = node.Destination.Type; + } + else + { + type = node.GetSource(1).Type; + } + + Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type); + + if (memOp != default) + { + node.SetSource(0, memOp); + } + } + } + } + + Optimizer.RemoveUnusedNodes(cfg); + } + + private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type) + { + Operand baseOp = addr; + + // First we check if the address is the result of a local X with immediate + // addition. If that is the case, then the baseOp is X, and the memory operand immediate + // becomes the addition immediate. Otherwise baseOp keeps being the address. + int imm = GetConstOp(ref baseOp, type); + if (imm != 0) + { + return MemoryOp(type, baseOp, default, Multiplier.x1, imm); + } + + // Now we check if the baseOp is the result of a local Y with a local Z addition. + // If that is the case, we now set baseOp to Y and indexOp to Z. We further check + // if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize), + // if that is the case, we set indexOp to W and adjust the scale value of the memory operand + // to match that of the left shift. + // There is one missed case, which is the address being a shift result, but this is + // probably not worth optimizing as it should never happen. + (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type); + + // If baseOp is still equal to address, then there's nothing that can be optimized. + if (baseOp == addr) + { + return default; + } + + return MemoryOp(type, baseOp, indexOp, scale, 0); + } + + private static int GetConstOp(ref Operand baseOp, OperandType accessType) + { + Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (operation == default) + { + return 0; + } + + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + Operand constOp; + Operand otherOp; + + if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable) + { + constOp = src1; + otherOp = src2; + } + else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant) + { + constOp = src2; + otherOp = src1; + } + else + { + return 0; + } + + // If we have addition by a constant that we can't encode on the instruction, + // then we can't optimize it further. + if (ConstTooLong(constOp, accessType)) + { + return 0; + } + + baseOp = otherOp; + + return constOp.AsInt32(); + } + + private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType) + { + Operand indexOp = default; + + Multiplier scale = Multiplier.x1; + + Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (addOp == default) + { + return (indexOp, scale); + } + + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable) + { + return (indexOp, scale); + } + + baseOp = src1; + indexOp = src2; + + Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft); + + bool indexOnSrc2 = false; + + if (shlOp == default) + { + shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft); + + indexOnSrc2 = true; + } + + if (shlOp != default) + { + Operand shSrc = shlOp.GetSource(0); + Operand shift = shlOp.GetSource(1); + + int maxShift = Assembler.GetScaleForType(accessType); + + if (shSrc.Kind == OperandKind.LocalVariable && + shift.Kind == OperandKind.Constant && + (shift.Value == 0 || shift.Value == (ulong)maxShift)) + { + scale = shift.Value switch + { + 1 => Multiplier.x2, + 2 => Multiplier.x4, + 3 => Multiplier.x8, + 4 => Multiplier.x16, + _ => Multiplier.x1 + }; + + baseOp = indexOnSrc2 ? src1 : src2; + indexOp = shSrc; + } + } + + return (indexOp, scale); + } + + private static Operation GetAsgOpWithInst(Operand op, Instruction inst) + { + // If we have multiple assignments, folding is not safe + // as the value may be different depending on the + // control flow path. + if (op.AssignmentsCount != 1) + { + return default; + } + + Operation asgOp = op.Assignments[0]; + + if (asgOp.Instruction != inst) + { + return default; + } + + return asgOp; + } + + private static bool IsMemoryLoadOrStore(Instruction inst) + { + return inst == Instruction.Load || inst == Instruction.Store; + } + + private static bool ConstTooLong(Operand constOp, OperandType accessType) + { + if ((uint)constOp.Value != constOp.Value) + { + return true; + } + + return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType); + } + } +} diff --git a/ARMeilleure/CodeGen/Arm64/ArmCondition.cs b/ARMeilleure/CodeGen/Arm64/ArmCondition.cs new file mode 100644 index 00000000..db27a810 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/ArmCondition.cs @@ -0,0 +1,47 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmCondition + { + Eq = 0, + Ne = 1, + GeUn = 2, + LtUn = 3, + Mi = 4, + Pl = 5, + Vs = 6, + Vc = 7, + GtUn = 8, + LeUn = 9, + Ge = 10, + Lt = 11, + Gt = 12, + Le = 13, + Al = 14, + Nv = 15 + } + + static class ComparisonArm64Extensions + { + public static ArmCondition ToArmCondition(this Comparison comp) + { + return comp switch + { + Comparison.Equal => ArmCondition.Eq, + Comparison.NotEqual => ArmCondition.Ne, + Comparison.Greater => ArmCondition.Gt, + Comparison.LessOrEqual => ArmCondition.Le, + Comparison.GreaterUI => ArmCondition.GtUn, + Comparison.LessOrEqualUI => ArmCondition.LeUn, + Comparison.GreaterOrEqual => ArmCondition.Ge, + Comparison.Less => ArmCondition.Lt, + Comparison.GreaterOrEqualUI => ArmCondition.GeUn, + Comparison.LessUI => ArmCondition.LtUn, + + _ => throw new ArgumentException(null, nameof(comp)) + }; + } + } +} diff --git a/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs b/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs new file mode 100644 index 00000000..062a6d0b --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmExtensionType + { + Uxtb = 0, + Uxth = 1, + Uxtw = 2, + Uxtx = 3, + Sxtb = 4, + Sxth = 5, + Sxtw = 6, + Sxtx = 7 + } +} diff --git a/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs b/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs new file mode 100644 index 00000000..d223a146 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs @@ -0,0 +1,11 @@ + +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmShiftType + { + Lsl = 0, + Lsr = 1, + Asr = 2, + Ror = 3 + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/Assembler.cs b/ARMeilleure/CodeGen/Arm64/Assembler.cs new file mode 100644 index 00000000..0ec0be7c --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/Assembler.cs @@ -0,0 +1,1160 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Diagnostics; +using System.IO; +using static ARMeilleure.IntermediateRepresentation.Operand; + +namespace ARMeilleure.CodeGen.Arm64 +{ + class Assembler + { + public const uint SfFlag = 1u << 31; + + private const int SpRegister = 31; + private const int ZrRegister = 31; + + private readonly Stream _stream; + + public Assembler(Stream stream) + { + _stream = stream; + } + + public void Add(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0) + { + WriteInstructionAuto(0x0b200000u, rd, rn, rm, extensionType, shiftAmount); + } + + public void Add(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0, bool immForm = false) + { + WriteInstructionAuto(0x11000000u, 0x0b000000u, rd, rn, rm, shiftType, shiftAmount, immForm); + } + + public void And(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x12000000u, 0x0a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Ands(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x72000000u, 0x6a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Asr(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Sbfm(rd, rn, shift, mask); + } + else + { + Asrv(rd, rn, rm); + } + } + + public void Asrv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02800u, rd, rn, rm); + } + + public void B(int imm) + { + WriteUInt32(0x14000000u | EncodeSImm26_2(imm)); + } + + public void B(ArmCondition condition, int imm) + { + WriteUInt32(0x54000000u | (uint)condition | (EncodeSImm19_2(imm) << 5)); + } + + public void Blr(Operand rn) + { + WriteUInt32(0xd63f0000u | (EncodeReg(rn) << 5)); + } + + public void Br(Operand rn) + { + WriteUInt32(0xd61f0000u | (EncodeReg(rn) << 5)); + } + + public void Brk() + { + WriteUInt32(0xd4200000u); + } + + public void Cbz(Operand rt, int imm) + { + WriteInstructionAuto(0x34000000u | (EncodeSImm19_2(imm) << 5), rt); + } + + public void Cbnz(Operand rt, int imm) + { + WriteInstructionAuto(0x35000000u | (EncodeSImm19_2(imm) << 5), rt); + } + + public void Clrex(int crm = 15) + { + WriteUInt32(0xd503305fu | (EncodeUImm4(crm) << 8)); + } + + public void Clz(Operand rd, Operand rn) + { + WriteInstructionAuto(0x5ac01000u, rd, rn); + } + + public void CmeqVector(Operand rd, Operand rn, Operand rm, int size, bool q = true) + { + Debug.Assert((uint)size < 4); + WriteSimdInstruction(0x2e208c00u | ((uint)size << 22), rd, rn, rm, q); + } + + public void Cmp(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Subs(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount); + } + + public void Csel(Operand rd, Operand rn, Operand rm, ArmCondition condition) + { + WriteInstructionBitwiseAuto(0x1a800000u | ((uint)condition << 12), rd, rn, rm); + } + + public void Cset(Operand rd, ArmCondition condition) + { + var zr = Factory.Register(ZrRegister, RegisterType.Integer, rd.Type); + Csinc(rd, zr, zr, (ArmCondition)((int)condition ^ 1)); + } + + public void Csinc(Operand rd, Operand rn, Operand rm, ArmCondition condition) + { + WriteInstructionBitwiseAuto(0x1a800400u | ((uint)condition << 12), rd, rn, rm); + } + + public void Dmb(uint option) + { + WriteUInt32(0xd50330bfu | (option << 8)); + } + + public void DupScalar(Operand rd, Operand rn, int index, int size) + { + WriteInstruction(0x5e000400u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn); + } + + public void Eor(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x52000000u, 0x4a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void EorVector(Operand rd, Operand rn, Operand rm, bool q = true) + { + WriteSimdInstruction(0x2e201c00u, rd, rn, rm, q); + } + + public void Extr(Operand rd, Operand rn, Operand rm, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionBitwiseAuto(0x13800000u | n | (EncodeUImm6(imms) << 10), rd, rn, rm); + } + + public void FaddScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e202800u, rd, rn, rm); + } + + public void FcvtScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e224000u | (rd.Type == OperandType.FP64 ? 1u << 15 : 1u << 22); + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void FdivScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e201800u, rd, rn, rm); + } + + public void Fmov(Operand rd, Operand rn) + { + WriteFPInstructionAuto(0x1e204000u, rd, rn); + } + + public void Fmov(Operand rd, Operand rn, bool topHalf) + { + Debug.Assert(rd.Type.IsInteger() != rn.Type.IsInteger()); + Debug.Assert(rd.Type == OperandType.I64 || rn.Type == OperandType.I64 || !topHalf); + + uint opcode = rd.Type.IsInteger() ? 0b110u : 0b111u; + + uint rmode = topHalf ? 1u << 19 : 0u; + uint ftype = rd.Type == OperandType.FP64 || rn.Type == OperandType.FP64 ? 1u << 22 : 0u; + uint sf = rd.Type == OperandType.I64 || rn.Type == OperandType.I64 ? SfFlag : 0u; + + WriteUInt32(0x1e260000u | (opcode << 16) | rmode | ftype | sf | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void FmulScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e200800u, rd, rn, rm); + } + + public void FnegScalar(Operand rd, Operand rn) + { + WriteFPInstructionAuto(0x1e214000u, rd, rn); + } + + public void FsubScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e203800u, rd, rn, rm); + } + + public void Ins(Operand rd, Operand rn, int index, int size) + { + WriteInstruction(0x4e001c00u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn); + } + + public void Ins(Operand rd, Operand rn, int srcIndex, int dstIndex, int size) + { + uint imm4 = (uint)srcIndex << size; + Debug.Assert((uint)srcIndex < (16u >> size)); + WriteInstruction(0x6e000400u | (imm4 << 11) | (EncodeIndexSizeImm5(dstIndex, size) << 16), rd, rn); + } + + public void Ldaxp(Operand rt, Operand rt2, Operand rn) + { + WriteInstruction(0x887f8000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rt2); + } + + public void Ldaxr(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn); + } + + public void Ldaxrb(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u, rt, rn); + } + + public void Ldaxrh(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u | (1u << 30), rt, rn); + } + + public void LdpRiPost(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x28c00000u, 0x2cc00000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void LdpRiPre(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29c00000u, 0x2dc00000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void LdpRiUn(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29400000u, 0x2d400000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void Ldr(Operand rt, Operand rn) + { + if (rn.Kind == OperandKind.Memory) + { + MemoryOperand memOp = rn.GetMemory(); + + if (memOp.Index != default) + { + Debug.Assert(memOp.Displacement == 0); + Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type)); + LdrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1); + } + else + { + LdrRiUn(rt, memOp.BaseAddress, memOp.Displacement); + } + } + else + { + LdrRiUn(rt, rn, 0); + } + } + + public void LdrLit(Operand rt, int offset) + { + uint instruction = 0x18000000u | (EncodeSImm19_2(offset) << 5); + + if (rt.Type == OperandType.I64) + { + instruction |= 1u << 30; + } + + WriteInstruction(instruction, rt); + } + + public void LdrRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400400u, 0x3c400400u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400c00u, 0x3c400c00u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb9400000u, 0x3d400000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift) + { + uint instruction = GetLdrStrInstruction(0xb8600800u, 0x3ce00800u, rt.Type); + WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift); + } + + public void LdrbRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38400400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrbRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38400c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrbRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x39400000u | (EncodeUImm12(imm, 0) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78400400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78400c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x79400000u | (EncodeUImm12(imm, 1) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void Ldur(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400000u, 0x3c400000u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void Lsl(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Ubfm(rd, rn, -shift & mask, mask - shift); + } + else + { + Lslv(rd, rn, rm); + } + } + + public void Lslv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02000u, rd, rn, rm); + } + + public void Lsr(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Ubfm(rd, rn, shift, mask); + } + else + { + Lsrv(rd, rn, rm); + } + } + + public void Lsrv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02400u, rd, rn, rm); + } + + public void Madd(Operand rd, Operand rn, Operand rm, Operand ra) + { + WriteInstructionAuto(0x1b000000u, rd, rn, rm, ra); + } + + public void Mul(Operand rd, Operand rn, Operand rm) + { + Madd(rd, rn, rm, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type)); + } + + public void Mov(Operand rd, Operand rn) + { + if (rd.Type.IsInteger()) + { + Orr(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn); + } + else + { + OrrVector(rd, rn, rn); + } + } + + public void MovSp(Operand rd, Operand rn) + { + if (rd.GetRegister().Index == SpRegister || + rn.GetRegister().Index == SpRegister) + { + Add(rd, rn, Factory.Const(rd.Type, 0), immForm: true); + } + else + { + Mov(rd, rn); + } + } + + public void Mov(Operand rd, int imm) + { + Movz(rd, imm, 0); + } + + public void Movz(Operand rd, int imm, int hw) + { + Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw); + WriteInstructionAuto(0x52800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd); + } + + public void Movk(Operand rd, int imm, int hw) + { + Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw); + WriteInstructionAuto(0x72800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd); + } + + public void Mrs(Operand rt, uint o0, uint op1, uint crn, uint crm, uint op2) + { + uint instruction = 0xd5300000u; + + instruction |= (op2 & 7) << 5; + instruction |= (crm & 15) << 8; + instruction |= (crn & 15) << 12; + instruction |= (op1 & 7) << 16; + instruction |= (o0 & 1) << 19; + + WriteInstruction(instruction, rt); + } + + public void Mvn(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Orn(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount); + } + + public void Neg(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Sub(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount); + } + + public void Orn(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x2a200000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Orr(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x32000000u, 0x2a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void OrrVector(Operand rd, Operand rn, Operand rm, bool q = true) + { + WriteSimdInstruction(0x0ea01c00u, rd, rn, rm, q); + } + + public void Ret(Operand rn) + { + WriteUInt32(0xd65f0000u | (EncodeReg(rn) << 5)); + } + + public void Rev(Operand rd, Operand rn) + { + uint opc0 = rd.Type == OperandType.I64 ? 1u << 10 : 0u; + WriteInstructionAuto(0x5ac00800u | opc0, rd, rn); + } + + public void Ror(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Extr(rd, rn, rn, shift); + } + else + { + Rorv(rd, rn, rm); + } + } + + public void Rorv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02c00u, rd, rn, rm); + } + + public void Sbfm(Operand rd, Operand rn, int immr, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionAuto(0x13000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn); + } + + public void ScvtfScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e220000u; + + if (rn.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteFPInstructionAuto(instruction, rd, rn); + } + + public void Sdiv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16Auto(0x1ac00c00u, rd, rn, rm); + } + + public void Smulh(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16(0x9b407c00u, rd, rn, rm); + } + + public void Stlxp(Operand rt, Operand rt2, Operand rn, Operand rs) + { + WriteInstruction(0x88208000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs, rt2); + } + + public void Stlxr(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs); + } + + public void Stlxrb(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u, rt, rn, rs); + } + + public void Stlxrh(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u | (1u << 30), rt, rn, rs); + } + + public void StpRiPost(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x28800000u, 0x2c800000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void StpRiPre(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29800000u, 0x2d800000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void StpRiUn(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29000000u, 0x2d000000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void Str(Operand rt, Operand rn) + { + if (rn.Kind == OperandKind.Memory) + { + MemoryOperand memOp = rn.GetMemory(); + + if (memOp.Index != default) + { + Debug.Assert(memOp.Displacement == 0); + Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type)); + StrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1); + } + else + { + StrRiUn(rt, memOp.BaseAddress, memOp.Displacement); + } + } + else + { + StrRiUn(rt, rn, 0); + } + } + + public void StrRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000400u, 0x3c000400u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000c00u, 0x3c000c00u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb9000000u, 0x3d000000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void StrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift) + { + uint instruction = GetLdrStrInstruction(0xb8200800u, 0x3ca00800u, rt.Type); + WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift); + } + + public void StrbRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38000400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrbRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38000c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrbRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x39000000u | (EncodeUImm12(imm, 0) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78000400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78000c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x79000000u | (EncodeUImm12(imm, 1) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void Stur(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000000u, 0x3c000000u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void Sub(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0) + { + WriteInstructionAuto(0x4b200000u, rd, rn, rm, extensionType, shiftAmount); + } + + public void Sub(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionAuto(0x51000000u, 0x4b000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Subs(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionAuto(0x71000000u, 0x6b000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Sxtb(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 7); + } + + public void Sxth(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 15); + } + + public void Sxtw(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 31); + } + + public void Tst(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Ands(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount); + } + + public void Ubfm(Operand rd, Operand rn, int immr, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionAuto(0x53000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn); + } + + public void UcvtfScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e230000u; + + if (rn.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteFPInstructionAuto(instruction, rd, rn); + } + + public void Udiv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16Auto(0x1ac00800u, rd, rn, rm); + } + + public void Umov(Operand rd, Operand rn, int index, int size) + { + uint q = size == 3 ? 1u << 30 : 0u; + WriteInstruction(0x0e003c00u | (EncodeIndexSizeImm5(index, size) << 16) | q, rd, rn); + } + + public void Umulh(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16(0x9bc07c00u, rd, rn, rm); + } + + public void Uxtb(Operand rd, Operand rn) + { + Ubfm(rd, rn, 0, 7); + } + + public void Uxth(Operand rd, Operand rn) + { + Ubfm(rd, rn, 0, 15); + } + + private void WriteInstructionAuto( + uint instI, + uint instR, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0, + bool immForm = false) + { + if (rm.Kind == OperandKind.Constant && (rm.Value != 0 || immForm)) + { + Debug.Assert(shiftAmount == 0); + int imm = rm.AsInt32(); + Debug.Assert((uint)imm == rm.Value); + if (imm != 0 && (imm & 0xfff) == 0) + { + instI |= 1 << 22; // sh flag + imm >>= 12; + } + WriteInstructionAuto(instI | (EncodeUImm12(imm, 0) << 10), rd, rn); + } + else + { + instR |= EncodeUImm6(shiftAmount) << 10; + instR |= (uint)shiftType << 22; + + WriteInstructionRm16Auto(instR, rd, rn, rm); + } + } + + private void WriteInstructionAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmExtensionType extensionType, + int shiftAmount = 0) + { + Debug.Assert((uint)shiftAmount <= 4); + + instruction |= (uint)shiftAmount << 10; + instruction |= (uint)extensionType << 13; + + WriteInstructionRm16Auto(instruction, rd, rn, rm); + } + + private void WriteInstructionBitwiseAuto( + uint instI, + uint instR, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0) + { + if (rm.Kind == OperandKind.Constant && rm.Value != 0) + { + Debug.Assert(shiftAmount == 0); + bool canEncode = CodeGenCommon.TryEncodeBitMask(rm, out int immN, out int immS, out int immR); + Debug.Assert(canEncode); + uint instruction = instI | ((uint)immS << 10) | ((uint)immR << 16) | ((uint)immN << 22); + + WriteInstructionAuto(instruction, rd, rn); + } + else + { + WriteInstructionBitwiseAuto(instR, rd, rn, rm, shiftType, shiftAmount); + } + } + + private void WriteInstructionBitwiseAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + instruction |= EncodeUImm6(shiftAmount) << 10; + instruction |= (uint)shiftType << 22; + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionLdrStrAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmExtensionType extensionType, + bool shift) + { + if (shift) + { + instruction |= 1u << 12; + } + + instruction |= (uint)extensionType << 13; + + if (rd.Type == OperandType.I64) + { + instruction |= 1u << 30; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionAuto(uint instruction, Operand rd) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd); + } + + public void WriteInstructionAuto(uint instruction, Operand rd, Operand rn) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd, rn); + } + + private void WriteInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd, rn, rm, ra); + } + + public void WriteInstruction(uint instruction, Operand rd) + { + WriteUInt32(instruction | EncodeReg(rd)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 10)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(ra) << 10) | (EncodeReg(rm) << 16)); + } + + private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn) + { + if (rd.Type == OperandType.FP64) + { + instruction |= 1u << 22; + } + + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm) + { + if (rd.Type == OperandType.FP64) + { + instruction |= 1u << 22; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteSimdInstruction(uint instruction, Operand rd, Operand rn, Operand rm, bool q = true) + { + if (q) + { + instruction |= 1u << 30; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionRm16Auto(uint instruction, Operand rd, Operand rn, Operand rm) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + public void WriteInstructionRm16(uint instruction, Operand rd, Operand rn, Operand rm) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16)); + } + + public void WriteInstructionRm16NoRet(uint instruction, Operand rn, Operand rm) + { + WriteUInt32(instruction | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16)); + } + + private static uint GetLdpStpInstruction(uint intInst, uint vecInst, int imm, OperandType type) + { + uint instruction; + int scale; + + if (type.IsInteger()) + { + instruction = intInst; + + if (type == OperandType.I64) + { + instruction |= SfFlag; + scale = 3; + } + else + { + scale = 2; + } + } + else + { + int opc = type switch + { + OperandType.FP32 => 0, + OperandType.FP64 => 1, + _ => 2 + }; + + instruction = vecInst | ((uint)opc << 30); + scale = 2 + opc; + } + + instruction |= (EncodeSImm7(imm, scale) << 15); + + return instruction; + } + + private static uint GetLdrStrInstruction(uint intInst, uint vecInst, OperandType type) + { + uint instruction; + + if (type.IsInteger()) + { + instruction = intInst; + + if (type == OperandType.I64) + { + instruction |= 1 << 30; + } + } + else + { + instruction = vecInst; + + if (type == OperandType.V128) + { + instruction |= 1u << 23; + } + else + { + instruction |= type == OperandType.FP32 ? 2u << 30 : 3u << 30; + } + } + + return instruction; + } + + private static uint EncodeIndexSizeImm5(int index, int size) + { + Debug.Assert((uint)size < 4); + Debug.Assert((uint)index < (16u >> size), $"Invalid index {index} and size {size} combination."); + return ((uint)index << (size + 1)) | (1u << size); + } + + private static uint EncodeSImm7(int value, int scale) + { + uint imm = (uint)(value >> scale) & 0x7f; + Debug.Assert(((int)imm << 25) >> (25 - scale) == value, $"Failed to encode constant 0x{value:X} with scale {scale}."); + return imm; + } + + private static uint EncodeSImm9(int value) + { + uint imm = (uint)value & 0x1ff; + Debug.Assert(((int)imm << 23) >> 23 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeSImm19_2(int value) + { + uint imm = (uint)(value >> 2) & 0x7ffff; + Debug.Assert(((int)imm << 13) >> 11 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeSImm26_2(int value) + { + uint imm = (uint)(value >> 2) & 0x3ffffff; + Debug.Assert(((int)imm << 6) >> 4 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm4(int value) + { + uint imm = (uint)value & 0xf; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm6(int value) + { + uint imm = (uint)value & 0x3f; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm12(int value, OperandType type) + { + return EncodeUImm12(value, GetScaleForType(type)); + } + + private static uint EncodeUImm12(int value, int scale) + { + uint imm = (uint)(value >> scale) & 0xfff; + Debug.Assert((int)imm << scale == value, $"Failed to encode constant 0x{value:X} with scale {scale}."); + return imm; + } + + private static uint EncodeUImm16(int value) + { + uint imm = (uint)value & 0xffff; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeReg(Operand reg) + { + if (reg.Kind == OperandKind.Constant && reg.Value == 0) + { + return ZrRegister; + } + + uint regIndex = (uint)reg.GetRegister().Index; + Debug.Assert(reg.Kind == OperandKind.Register); + Debug.Assert(regIndex < 32); + return regIndex; + } + + public static int GetScaleForType(OperandType type) + { + return type switch + { + OperandType.I32 => 2, + OperandType.I64 => 3, + OperandType.FP32 => 2, + OperandType.FP64 => 3, + OperandType.V128 => 4, + _ => throw new ArgumentException($"Invalid type {type}.") + }; + } + + private void WriteInt16(short value) + { + WriteUInt16((ushort)value); + } + + private void WriteInt32(int value) + { + WriteUInt32((uint)value); + } + + private void WriteByte(byte value) + { + _stream.WriteByte(value); + } + + private void WriteUInt16(ushort value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + } + + private void WriteUInt32(uint value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + } + } +} diff --git a/ARMeilleure/CodeGen/Arm64/CallingConvention.cs b/ARMeilleure/CodeGen/Arm64/CallingConvention.cs new file mode 100644 index 00000000..fda8d786 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/CallingConvention.cs @@ -0,0 +1,96 @@ +using System; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CallingConvention + { + private const int RegistersMask = unchecked((int)0xffffffff); + + // Some of those register have specific roles and can't be used as general purpose registers. + // X18 - Reserved for platform specific usage. + // X29 - Frame pointer. + // X30 - Return address. + // X31 - Not an actual register, in some cases maps to SP, and in others to ZR. + private const int ReservedRegsMask = (1 << CodeGenCommon.ReservedRegister) | (1 << 18) | (1 << 29) | (1 << 30) | (1 << 31); + + public static int GetIntAvailableRegisters() + { + return RegistersMask & ~ReservedRegsMask; + } + + public static int GetVecAvailableRegisters() + { + return RegistersMask; + } + + public static int GetIntCallerSavedRegisters() + { + return (GetIntCalleeSavedRegisters() ^ RegistersMask) & ~ReservedRegsMask; + } + + public static int GetFpCallerSavedRegisters() + { + return GetFpCalleeSavedRegisters() ^ RegistersMask; + } + + public static int GetVecCallerSavedRegisters() + { + return GetVecCalleeSavedRegisters() ^ RegistersMask; + } + + public static int GetIntCalleeSavedRegisters() + { + return 0x1ff80000; // X19 to X28 + } + + public static int GetFpCalleeSavedRegisters() + { + return 0xff00; // D8 to D15 + } + + public static int GetVecCalleeSavedRegisters() + { + return 0; + } + + public static int GetArgumentsOnRegsCount() + { + return 8; + } + + public static int GetIntArgumentRegister(int index) + { + if ((uint)index < (uint)GetArgumentsOnRegsCount()) + { + return index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static int GetVecArgumentRegister(int index) + { + if ((uint)index < (uint)GetArgumentsOnRegsCount()) + { + return index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static int GetIntReturnRegister() + { + return 0; + } + + public static int GetIntReturnRegisterHigh() + { + return 1; + } + + public static int GetVecReturnRegister() + { + return 0; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs b/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs new file mode 100644 index 00000000..e67d2fdb --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs @@ -0,0 +1,173 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Numerics; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGenCommon + { + public const int TcAddressRegister = 8; + public const int ReservedRegister = 17; + + public static bool ConstFitsOnSImm7(int value, int scale) + { + return (((value >> scale) << 25) >> (25 - scale)) == value; + } + + public static bool ConstFitsOnSImm9(int value) + { + return ((value << 23) >> 23) == value; + } + + public static bool ConstFitsOnUImm12(int value) + { + return (value & 0xfff) == value; + } + + public static bool ConstFitsOnUImm12(int value, OperandType type) + { + int scale = Assembler.GetScaleForType(type); + return (((value >> scale) & 0xfff) << scale) == value; + } + + public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR) + { + ulong value = operand.Value; + + if (operand.Type == OperandType.I32) + { + value |= value << 32; + } + + return TryEncodeBitMask(value, out immN, out immS, out immR); + } + + public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR) + { + // Some special values also can't be encoded: + // 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0). + // A value with all bits set can't be encoded because it is reserved according to the spec, because: + // Any value AND all ones will be equal itself, so it's effectively a no-op. + // Any value OR all ones will be equal all ones, so one can just use MOV. + // Any value XOR all ones will be equal its inverse, so one can just use MVN. + if (value == ulong.MaxValue) + { + immN = 0; + immS = 0; + immR = 0; + + return false; + } + + int bitLength = CountSequence(value); + + if ((value >> bitLength) != 0) + { + bitLength += CountSequence(value >> bitLength); + } + + int bitLengthLog2 = BitOperations.Log2((uint)bitLength); + int bitLengthPow2 = 1 << bitLengthLog2; + + if (bitLengthPow2 < bitLength) + { + bitLengthLog2++; + bitLengthPow2 <<= 1; + } + + int selectedESize = 64; + int repetitions = 1; + int onesCount = BitOperations.PopCount(value); + + if (bitLengthPow2 < 64 && (value >> bitLengthPow2) != 0) + { + for (int eSizeLog2 = bitLengthLog2; eSizeLog2 < 6; eSizeLog2++) + { + bool match = true; + int eSize = 1 << eSizeLog2; + ulong mask = (1UL << eSize) - 1; + ulong eValue = value & mask; + + for (int e = 1; e < 64 / eSize; e++) + { + if (((value >> (e * eSize)) & mask) != eValue) + { + match = false; + break; + } + } + + if (match) + { + selectedESize = eSize; + repetitions = 64 / eSize; + onesCount = BitOperations.PopCount(eValue); + break; + } + } + } + + // Find rotation. We have two cases, one where the highest bit is 0 + // and one where it is 1. + // If it's 1, we just need to count the number of 1 bits on the MSB to find the right rotation. + // If it's 0, we just need to count the number of 0 bits on the LSB to find the left rotation, + // then we can convert it to the right rotation shift by subtracting the value from the element size. + int rotation; + long vHigh = (long)(value << (64 - selectedESize)); + if (vHigh < 0) + { + rotation = BitOperations.LeadingZeroCount(~(ulong)vHigh); + } + else + { + rotation = (selectedESize - BitOperations.TrailingZeroCount(value)) & (selectedESize - 1); + } + + // Reconstruct value and see if it matches. If not, we can't encode. + ulong reconstructed = onesCount == 64 ? ulong.MaxValue : RotateRight((1UL << onesCount) - 1, rotation, selectedESize); + + for (int bit = 32; bit >= selectedESize; bit >>= 1) + { + reconstructed |= reconstructed << bit; + } + + if (reconstructed != value || onesCount == 0) + { + immN = 0; + immS = 0; + immR = 0; + + return false; + } + + immR = rotation; + + // immN indicates that there are no repetitions. + // The MSB of immS indicates the amount of repetitions, and the LSB the number of bits set. + if (repetitions == 1) + { + immN = 1; + immS = 0; + } + else + { + immN = 0; + immS = (0xf80 >> BitOperations.Log2((uint)repetitions)) & 0x3f; + } + + immS |= onesCount - 1; + + return true; + } + + private static int CountSequence(ulong value) + { + return BitOperations.TrailingZeroCount(value) + BitOperations.TrailingZeroCount(~value); + } + + private static ulong RotateRight(ulong bits, int shift, int size) + { + return (bits >> shift) | ((bits << (size - shift)) & (size == 64 ? ulong.MaxValue : (1UL << size) - 1)); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs b/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs new file mode 100644 index 00000000..1ddde0c1 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs @@ -0,0 +1,286 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.IO; + +namespace ARMeilleure.CodeGen.Arm64 +{ + class CodeGenContext + { + private const int BccInstLength = 4; + private const int CbnzInstLength = 4; + private const int LdrLitInstLength = 4; + + private Stream _stream; + + public int StreamOffset => (int)_stream.Length; + + public AllocationResult AllocResult { get; } + + public Assembler Assembler { get; } + + public BasicBlock CurrBlock { get; private set; } + + public bool HasCall { get; } + + public int CallArgsRegionSize { get; } + public int FpLrSaveRegionSize { get; } + + private readonly Dictionary<BasicBlock, long> _visitedBlocks; + private readonly Dictionary<BasicBlock, List<(ArmCondition Condition, long BranchPos)>> _pendingBranches; + + private struct ConstantPoolEntry + { + public readonly int Offset; + public readonly Symbol Symbol; + public readonly List<(Operand, int)> LdrOffsets; + + public ConstantPoolEntry(int offset, Symbol symbol) + { + Offset = offset; + Symbol = symbol; + LdrOffsets = new List<(Operand, int)>(); + } + } + + private readonly Dictionary<ulong, ConstantPoolEntry> _constantPool; + + private bool _constantPoolWritten; + private long _constantPoolOffset; + + private ArmCondition _jNearCondition; + private Operand _jNearValue; + + private long _jNearPosition; + + private readonly bool _relocatable; + + public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable) + { + _stream = new MemoryStream(); + + AllocResult = allocResult; + + Assembler = new Assembler(_stream); + + bool hasCall = maxCallArgs >= 0; + + HasCall = hasCall; + + if (maxCallArgs < 0) + { + maxCallArgs = 0; + } + + CallArgsRegionSize = maxCallArgs * 16; + FpLrSaveRegionSize = hasCall ? 16 : 0; + + _visitedBlocks = new Dictionary<BasicBlock, long>(); + _pendingBranches = new Dictionary<BasicBlock, List<(ArmCondition, long)>>(); + _constantPool = new Dictionary<ulong, ConstantPoolEntry>(); + + _relocatable = relocatable; + } + + public void EnterBlock(BasicBlock block) + { + CurrBlock = block; + + long target = _stream.Position; + + if (_pendingBranches.TryGetValue(block, out var list)) + { + foreach (var tuple in list) + { + _stream.Seek(tuple.BranchPos, SeekOrigin.Begin); + WriteBranch(tuple.Condition, target); + } + + _stream.Seek(target, SeekOrigin.Begin); + _pendingBranches.Remove(block); + } + + _visitedBlocks.Add(block, target); + } + + public void JumpTo(BasicBlock target) + { + JumpTo(ArmCondition.Al, target); + } + + public void JumpTo(ArmCondition condition, BasicBlock target) + { + if (_visitedBlocks.TryGetValue(target, out long offset)) + { + WriteBranch(condition, offset); + } + else + { + if (!_pendingBranches.TryGetValue(target, out var list)) + { + list = new List<(ArmCondition, long)>(); + _pendingBranches.Add(target, list); + } + + list.Add((condition, _stream.Position)); + + _stream.Seek(BccInstLength, SeekOrigin.Current); + } + } + + private void WriteBranch(ArmCondition condition, long to) + { + int imm = checked((int)(to - _stream.Position)); + + if (condition != ArmCondition.Al) + { + Assembler.B(condition, imm); + } + else + { + Assembler.B(imm); + } + } + + public void JumpToNear(ArmCondition condition) + { + _jNearCondition = condition; + _jNearPosition = _stream.Position; + + _stream.Seek(BccInstLength, SeekOrigin.Current); + } + + public void JumpToNearIfNotZero(Operand value) + { + _jNearValue = value; + _jNearPosition = _stream.Position; + + _stream.Seek(CbnzInstLength, SeekOrigin.Current); + } + + public void JumpHere() + { + long currentPosition = _stream.Position; + long offset = currentPosition - _jNearPosition; + + _stream.Seek(_jNearPosition, SeekOrigin.Begin); + + if (_jNearValue != default) + { + Assembler.Cbnz(_jNearValue, checked((int)offset)); + _jNearValue = default; + } + else + { + Assembler.B(_jNearCondition, checked((int)offset)); + } + + _stream.Seek(currentPosition, SeekOrigin.Begin); + } + + public void ReserveRelocatableConstant(Operand rt, Symbol symbol, ulong value) + { + if (!_constantPool.TryGetValue(value, out ConstantPoolEntry cpe)) + { + cpe = new ConstantPoolEntry(_constantPool.Count * sizeof(ulong), symbol); + _constantPool.Add(value, cpe); + } + + cpe.LdrOffsets.Add((rt, (int)_stream.Position)); + _stream.Seek(LdrLitInstLength, SeekOrigin.Current); + } + + private long WriteConstantPool() + { + if (_constantPoolWritten) + { + return _constantPoolOffset; + } + + long constantPoolBaseOffset = _stream.Position; + + foreach (ulong value in _constantPool.Keys) + { + WriteUInt64(value); + } + + foreach (ConstantPoolEntry cpe in _constantPool.Values) + { + foreach ((Operand rt, int ldrOffset) in cpe.LdrOffsets) + { + _stream.Seek(ldrOffset, SeekOrigin.Begin); + + int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset)); + int pcRelativeOffset = absoluteOffset - ldrOffset; + + Assembler.LdrLit(rt, pcRelativeOffset); + } + } + + _stream.Seek(constantPoolBaseOffset + _constantPool.Count * sizeof(ulong), SeekOrigin.Begin); + + _constantPoolOffset = constantPoolBaseOffset; + _constantPoolWritten = true; + + return constantPoolBaseOffset; + } + + public (byte[], RelocInfo) GetCode() + { + long constantPoolBaseOffset = WriteConstantPool(); + + byte[] code = new byte[_stream.Length]; + + long originalPosition = _stream.Position; + + _stream.Seek(0, SeekOrigin.Begin); + _stream.Read(code, 0, code.Length); + _stream.Seek(originalPosition, SeekOrigin.Begin); + + RelocInfo relocInfo; + + if (_relocatable) + { + RelocEntry[] relocs = new RelocEntry[_constantPool.Count]; + + int index = 0; + + foreach (ConstantPoolEntry cpe in _constantPool.Values) + { + if (cpe.Symbol.Type != SymbolType.None) + { + int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset)); + relocs[index++] = new RelocEntry(absoluteOffset, cpe.Symbol); + } + } + + if (index != relocs.Length) + { + Array.Resize(ref relocs, index); + } + + relocInfo = new RelocInfo(relocs); + } + else + { + relocInfo = new RelocInfo(new RelocEntry[0]); + } + + return (code, relocInfo); + } + + private void WriteUInt64(ulong value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + _stream.WriteByte((byte)(value >> 32)); + _stream.WriteByte((byte)(value >> 40)); + _stream.WriteByte((byte)(value >> 48)); + _stream.WriteByte((byte)(value >> 56)); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs b/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs new file mode 100644 index 00000000..704aa45a --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs @@ -0,0 +1,1576 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Common; +using ARMeilleure.Diagnostics; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; + +using static ARMeilleure.IntermediateRepresentation.Operand; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGenerator + { + private const int DWordScale = 3; + + private const int RegistersCount = 32; + + private const int FpRegister = 29; + private const int LrRegister = 30; + private const int SpRegister = 31; + private const int ZrRegister = 31; + + private enum AccessSize + { + Byte, + Hword, + Auto + } + + private static Action<CodeGenContext, Operation>[] _instTable; + + static CodeGenerator() + { + _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))]; + + Add(Instruction.Add, GenerateAdd); + Add(Instruction.BitwiseAnd, GenerateBitwiseAnd); + Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr); + Add(Instruction.BitwiseNot, GenerateBitwiseNot); + Add(Instruction.BitwiseOr, GenerateBitwiseOr); + Add(Instruction.BranchIf, GenerateBranchIf); + Add(Instruction.ByteSwap, GenerateByteSwap); + Add(Instruction.Call, GenerateCall); + //Add(Instruction.Clobber, GenerateClobber); + Add(Instruction.Compare, GenerateCompare); + Add(Instruction.CompareAndSwap, GenerateCompareAndSwap); + Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16); + Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8); + Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32); + Add(Instruction.ConvertToFP, GenerateConvertToFP); + Add(Instruction.ConvertToFPUI, GenerateConvertToFPUI); + Add(Instruction.Copy, GenerateCopy); + Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros); + Add(Instruction.Divide, GenerateDivide); + Add(Instruction.DivideUI, GenerateDivideUI); + Add(Instruction.Fill, GenerateFill); + Add(Instruction.Load, GenerateLoad); + Add(Instruction.Load16, GenerateLoad16); + Add(Instruction.Load8, GenerateLoad8); + Add(Instruction.MemoryBarrier, GenerateMemoryBarrier); + Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI); + Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI); + Add(Instruction.Negate, GenerateNegate); + Add(Instruction.Return, GenerateReturn); + Add(Instruction.RotateRight, GenerateRotateRight); + Add(Instruction.ShiftLeft, GenerateShiftLeft); + Add(Instruction.ShiftRightSI, GenerateShiftRightSI); + Add(Instruction.ShiftRightUI, GenerateShiftRightUI); + Add(Instruction.SignExtend16, GenerateSignExtend16); + Add(Instruction.SignExtend32, GenerateSignExtend32); + Add(Instruction.SignExtend8, GenerateSignExtend8); + Add(Instruction.Spill, GenerateSpill); + Add(Instruction.SpillArg, GenerateSpillArg); + Add(Instruction.StackAlloc, GenerateStackAlloc); + Add(Instruction.Store, GenerateStore); + Add(Instruction.Store16, GenerateStore16); + Add(Instruction.Store8, GenerateStore8); + Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.Tailcall, GenerateTailcall); + Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar); + Add(Instruction.VectorExtract, GenerateVectorExtract); + Add(Instruction.VectorExtract16, GenerateVectorExtract16); + Add(Instruction.VectorExtract8, GenerateVectorExtract8); + Add(Instruction.VectorInsert, GenerateVectorInsert); + Add(Instruction.VectorInsert16, GenerateVectorInsert16); + Add(Instruction.VectorInsert8, GenerateVectorInsert8); + Add(Instruction.VectorOne, GenerateVectorOne); + Add(Instruction.VectorZero, GenerateVectorZero); + Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64); + Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96); + Add(Instruction.ZeroExtend16, GenerateZeroExtend16); + Add(Instruction.ZeroExtend32, GenerateZeroExtend32); + Add(Instruction.ZeroExtend8, GenerateZeroExtend8); + + static void Add(Instruction inst, Action<CodeGenContext, Operation> func) + { + _instTable[(int)inst] = func; + } + } + + public static CompiledFunction Generate(CompilerContext cctx) + { + ControlFlowGraph cfg = cctx.Cfg; + + Logger.StartPass(PassName.Optimization); + + if (cctx.Options.HasFlag(CompilerOptions.Optimize)) + { + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Optimizer.RunPass(cfg); + } + + BlockPlacement.RunPass(cfg); + } + + Arm64Optimizer.RunPass(cfg); + + Logger.EndPass(PassName.Optimization, cfg); + + Logger.StartPass(PassName.PreAllocation); + + StackAllocator stackAlloc = new(); + + PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs); + + Logger.EndPass(PassName.PreAllocation, cfg); + + Logger.StartPass(PassName.RegisterAllocation); + + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Ssa.Deconstruct(cfg); + } + + IRegisterAllocator regAlloc; + + if (cctx.Options.HasFlag(CompilerOptions.Lsra)) + { + regAlloc = new LinearScanAllocator(); + } + else + { + regAlloc = new HybridAllocator(); + } + + RegisterMasks regMasks = new( + CallingConvention.GetIntAvailableRegisters(), + CallingConvention.GetVecAvailableRegisters(), + CallingConvention.GetIntCallerSavedRegisters(), + CallingConvention.GetVecCallerSavedRegisters(), + CallingConvention.GetIntCalleeSavedRegisters(), + CallingConvention.GetVecCalleeSavedRegisters(), + RegistersCount); + + AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks); + + Logger.EndPass(PassName.RegisterAllocation, cfg); + + Logger.StartPass(PassName.CodeGeneration); + + //Console.Error.WriteLine(IRDumper.GetDump(cfg)); + + bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0; + + CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable); + + UnwindInfo unwindInfo = WritePrologue(context); + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + context.EnterBlock(block); + + for (Operation node = block.Operations.First; node != default;) + { + node = GenerateOperation(context, node); + } + + if (block.SuccessorsCount == 0) + { + // The only blocks which can have 0 successors are exit blocks. + Operation last = block.Operations.Last; + + Debug.Assert(last.Instruction == Instruction.Tailcall || + last.Instruction == Instruction.Return); + } + else + { + BasicBlock succ = block.GetSuccessor(0); + + if (succ != block.ListNext) + { + context.JumpTo(succ); + } + } + } + + (byte[] code, RelocInfo relocInfo) = context.GetCode(); + + Logger.EndPass(PassName.CodeGeneration); + + return new CompiledFunction(code, unwindInfo, relocInfo); + } + + private static Operation GenerateOperation(CodeGenContext context, Operation operation) + { + if (operation.Instruction == Instruction.Extended) + { + CodeGeneratorIntrinsic.GenerateOperation(context, operation); + } + else + { + if (IsLoadOrStore(operation) && + operation.ListNext != default && + operation.ListNext.Instruction == operation.Instruction && + TryPairMemoryOp(context, operation, operation.ListNext)) + { + // Skip next operation if we managed to pair them. + return operation.ListNext.ListNext; + } + + Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction]; + + if (func != null) + { + func(context, operation); + } + else + { + throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\"."); + } + } + + return operation.ListNext; + } + + private static void GenerateAdd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + // ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Add(dest, src1, src2); + } + else + { + context.Assembler.FaddScalar(dest, src1, src2); + } + } + + private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.And(dest, src1, src2); + } + + private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Eor(dest, src1, src2); + } + else + { + context.Assembler.EorVector(dest, src1, src2); + } + } + + private static void GenerateBitwiseNot(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Mvn(dest, source); + } + + private static void GenerateBitwiseOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Orr(dest, src1, src2); + } + + private static void GenerateBranchIf(CodeGenContext context, Operation operation) + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToArmCondition(); + + GenerateCompareCommon(context, operation); + + context.JumpTo(cond, context.CurrBlock.GetSuccessor(1)); + } + + private static void GenerateByteSwap(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Rev(dest, source); + } + + private static void GenerateCall(CodeGenContext context, Operation operation) + { + context.Assembler.Blr(operation.GetSource(0)); + } + + private static void GenerateCompare(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand comp = operation.GetSource(2); + + Debug.Assert(dest.Type == OperandType.I32); + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToArmCondition(); + + GenerateCompareCommon(context, operation); + + context.Assembler.Cset(dest, cond); + } + + private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation) + { + if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3. + { + Operand actualLow = operation.GetDestination(0); + Operand actualHigh = operation.GetDestination(1); + Operand temp0 = operation.GetDestination(2); + Operand temp1 = operation.GetDestination(3); + Operand address = operation.GetSource(0); + Operand expectedLow = operation.GetSource(1); + Operand expectedHigh = operation.GetSource(2); + Operand desiredLow = operation.GetSource(3); + Operand desiredHigh = operation.GetSource(4); + + GenerateAtomicDcas( + context, + address, + expectedLow, + expectedHigh, + desiredLow, + desiredHigh, + actualLow, + actualHigh, + temp0, + temp1); + } + else + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Auto); + } + } + + private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation) + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Hword); + } + + private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation) + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Byte); + } + + private static void GenerateCompareCommon(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(src1, src2); + + Debug.Assert(src1.Type.IsInteger()); + + context.Assembler.Cmp(src1, src2); + } + + private static void GenerateConditionalSelect(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src2, src3); + + Debug.Assert(dest.Type.IsInteger()); + Debug.Assert(src1.Type == OperandType.I32); + + context.Assembler.Cmp (src1, Const(src1.Type, 0)); + context.Assembler.Csel(dest, src2, src3, ArmCondition.Ne); + } + + private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64); + + context.Assembler.Mov(dest, Register(source, OperandType.I32)); + } + + private static void GenerateConvertToFP(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + Debug.Assert(dest.Type != source.Type); + Debug.Assert(source.Type != OperandType.V128); + + if (source.Type.IsInteger()) + { + context.Assembler.ScvtfScalar(dest, source); + } + else + { + context.Assembler.FcvtScalar(dest, source); + } + } + + private static void GenerateConvertToFPUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + Debug.Assert(dest.Type != source.Type); + Debug.Assert(source.Type.IsInteger()); + + context.Assembler.UcvtfScalar(dest, source); + } + + private static void GenerateCopy(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant); + + // Moves to the same register are useless. + if (dest.Kind == source.Kind && dest.Value == source.Value) + { + return; + } + + if (dest.Kind == OperandKind.Register && source.Kind == OperandKind.Constant) + { + if (source.Relocatable) + { + context.ReserveRelocatableConstant(dest, source.Symbol, source.Value); + } + else + { + GenerateConstantCopy(context, dest, source.Value); + } + } + else + { + context.Assembler.Mov(dest, source); + } + } + + private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Clz(dest, source); + } + + private static void GenerateDivide(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + ValidateBinOp(dest, dividend, divisor); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sdiv(dest, dividend, divisor); + } + else + { + context.Assembler.FdivScalar(dest, dividend, divisor); + } + } + + private static void GenerateDivideUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + ValidateBinOp(dest, dividend, divisor); + + context.Assembler.Udiv(dest, dividend, divisor); + } + + private static void GenerateLoad(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + context.Assembler.Ldr(value, address); + } + + private static void GenerateLoad16(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.LdrhRiUn(value, address, 0); + } + + private static void GenerateLoad8(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.LdrbRiUn(value, address, 0); + } + + private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation) + { + context.Assembler.Dmb(0xf); + } + + private static void GenerateMultiply(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Mul(dest, src1, src2); + } + else + { + context.Assembler.FmulScalar(dest, src1, src2); + } + } + + private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + Debug.Assert(dest.Type == OperandType.I64); + + context.Assembler.Smulh(dest, src1, src2); + } + + private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + Debug.Assert(dest.Type == OperandType.I64); + + context.Assembler.Umulh(dest, src1, src2); + } + + private static void GenerateNegate(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + if (dest.Type.IsInteger()) + { + context.Assembler.Neg(dest, source); + } + else + { + context.Assembler.FnegScalar(dest, source); + } + } + + private static void GenerateLoad(CodeGenContext context, Operand value, Operand address, int offset) + { + if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type)) + { + context.Assembler.LdrRiUn(value, address, offset); + } + else if (CodeGenCommon.ConstFitsOnSImm9(offset)) + { + context.Assembler.Ldur(value, address, offset); + } + else + { + Operand tempAddress = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempAddress, (ulong)offset); + context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input. + context.Assembler.LdrRiUn(value, tempAddress, 0); + } + } + + private static void GenerateReturn(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Ret(Register(LrRegister)); + } + + private static void GenerateRotateRight(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Ror(dest, src1, src2); + } + + private static void GenerateShiftLeft(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Lsl(dest, src1, src2); + } + + private static void GenerateShiftRightSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Asr(dest, src1, src2); + } + + private static void GenerateShiftRightUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Lsr(dest, src1, src2); + } + + private static void GenerateSignExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxth(dest, source); + } + + private static void GenerateSignExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxtw(dest, source); + } + + private static void GenerateSignExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxtb(dest, source); + } + + private static void GenerateFill(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize; + + GenerateLoad(context, dest, Register(SpRegister), offs); + } + + private static void GenerateStore(CodeGenContext context, Operand value, Operand address, int offset) + { + if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type)) + { + context.Assembler.StrRiUn(value, address, offset); + } + else if (CodeGenCommon.ConstFitsOnSImm9(offset)) + { + context.Assembler.Stur(value, address, offset); + } + else + { + Operand tempAddress = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempAddress, (ulong)offset); + context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input. + context.Assembler.StrRiUn(value, tempAddress, 0); + } + } + + private static void GenerateSpill(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, context.CallArgsRegionSize + context.FpLrSaveRegionSize); + } + + private static void GenerateSpillArg(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, 0); + } + + private static void GenerateStackAlloc(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize; + + context.Assembler.Add(dest, Register(SpRegister), Const(dest.Type, offs)); + } + + private static void GenerateStore(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + context.Assembler.Str(value, address); + } + + private static void GenerateStore16(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.StrhRiUn(value, address, 0); + } + + private static void GenerateStore8(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.StrbRiUn(value, address, 0); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset) + { + Operand offset = operation.GetSource(0); + Operand source = operation.GetSource(1); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + baseOffset; + + GenerateStore(context, source, Register(SpRegister), offs); + } + + private static void GenerateSubtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + // ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sub(dest, src1, src2); + } + else + { + context.Assembler.FsubScalar(dest, src1, src2); + } + } + + private static void GenerateTailcall(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Br(operation.GetSource(0)); + } + + private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + if (dest != default) + { + Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger()); + + OperandType destType = source.Type == OperandType.I64 ? OperandType.FP64 : OperandType.FP32; + + context.Assembler.Fmov(Register(dest, destType), source, topHalf: false); + } + } + + private static void GenerateVectorExtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes()); + + if (dest.Type.IsInteger()) + { + context.Assembler.Umov(dest, src1, index, dest.Type == OperandType.I64 ? 3 : 2); + } + else + { + context.Assembler.DupScalar(dest, src1, index, dest.Type == OperandType.FP64 ? 3 : 2); + } + } + + private static void GenerateVectorExtract16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 8); + + context.Assembler.Umov(dest, src1, index, 1); + } + + private static void GenerateVectorExtract8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 16); + + context.Assembler.Umov(dest, src1, index, 0); + } + + private static void GenerateVectorInsert(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + if (src2.Type.IsInteger()) + { + context.Assembler.Ins(dest, src2, index, src2.Type == OperandType.I64 ? 3 : 2); + } + else + { + context.Assembler.Ins(dest, src2, 0, index, src2.Type == OperandType.FP64 ? 3 : 2); + } + } + + private static void GenerateVectorInsert16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Ins(dest, src2, index, 1); + } + + private static void GenerateVectorInsert8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Ins(dest, src2, index, 0); + } + + private static void GenerateVectorOne(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.CmeqVector(dest, dest, dest, 2); + } + + private static void GenerateVectorZero(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.EorVector(dest, dest, dest); + } + + private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + context.Assembler.Fmov(Register(dest, OperandType.FP64), Register(source, OperandType.FP64)); + } + + private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + context.Assembler.Fmov(Register(dest, OperandType.FP32), Register(source, OperandType.FP32)); + } + + private static void GenerateZeroExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Uxth(dest, source); + } + + private static void GenerateZeroExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + // We can eliminate the move if source is already 32-bit and the registers are the same. + if (dest.Value == source.Value && source.Type == OperandType.I32) + { + return; + } + + context.Assembler.Mov(Register(dest.GetRegister().Index, OperandType.I32), source); + } + + private static void GenerateZeroExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Uxtb(dest, source); + } + + private static UnwindInfo WritePrologue(CodeGenContext context) + { + List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>(); + + Operand rsp = Register(SpRegister); + + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask); + int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask); + + int calleeSaveRegionSize = Align16(intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8); + + int offset = 0; + + WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64); + WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64); + + int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize); + int outArgsSize = context.CallArgsRegionSize; + + if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale)) + { + if (context.HasCall) + { + context.Assembler.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, -localSize); + context.Assembler.MovSp(Register(FpRegister), rsp); + } + + if (outArgsSize != 0) + { + context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, outArgsSize)); + } + } + else + { + int frameSize = localSize + outArgsSize; + if (frameSize != 0) + { + if (CodeGenCommon.ConstFitsOnUImm12(frameSize)) + { + context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, frameSize)); + } + else + { + Operand tempSize = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempSize, (ulong)frameSize); + context.Assembler.Sub(rsp, rsp, tempSize, ArmExtensionType.Uxtx); + } + } + + context.Assembler.StpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize); + + if (outArgsSize != 0) + { + context.Assembler.Add(Register(FpRegister), Register(SpRegister), Const(OperandType.I64, outArgsSize)); + } + else + { + context.Assembler.MovSp(Register(FpRegister), Register(SpRegister)); + } + } + + return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset); + } + + private static void WritePrologueCalleeSavesPreIndexed( + CodeGenContext context, + List<UnwindPushEntry> pushEntries, + ref int mask, + ref int offset, + int calleeSaveRegionSize, + OperandType type) + { + if ((BitOperations.PopCount((uint)mask) & 1) != 0) + { + int reg = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg)); + + mask &= ~(1 << reg); + + if (offset != 0) + { + context.Assembler.StrRiUn(Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.StrRiPre(Register(reg, type), Register(SpRegister), -calleeSaveRegionSize); + } + + offset += type.GetSizeInBytes(); + } + + while (mask != 0) + { + int reg = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg)); + + mask &= ~(1 << reg); + + int reg2 = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg2)); + + mask &= ~(1 << reg2); + + if (offset != 0) + { + context.Assembler.StpRiUn(Register(reg, type), Register(reg2, type), Register(SpRegister), offset); + } + else + { + context.Assembler.StpRiPre(Register(reg, type), Register(reg2, type), Register(SpRegister), -calleeSaveRegionSize); + } + + offset += type.GetSizeInBytes() * 2; + } + } + + private static void WriteEpilogue(CodeGenContext context) + { + Operand rsp = Register(SpRegister); + + int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize); + int outArgsSize = context.CallArgsRegionSize; + + if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale)) + { + if (outArgsSize != 0) + { + context.Assembler.Add(rsp, rsp, Const(OperandType.I64, outArgsSize)); + } + + if (context.HasCall) + { + context.Assembler.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, localSize); + } + } + else + { + if (context.HasCall) + { + context.Assembler.LdpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize); + } + + int frameSize = localSize + outArgsSize; + if (frameSize != 0) + { + if (CodeGenCommon.ConstFitsOnUImm12(frameSize)) + { + context.Assembler.Add(rsp, rsp, Const(OperandType.I64, frameSize)); + } + else + { + Operand tempSize = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempSize, (ulong)frameSize); + context.Assembler.Add(rsp, rsp, tempSize, ArmExtensionType.Uxtx); + } + } + } + + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask); + int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask); + + int offset = intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8; + int calleeSaveRegionSize = Align16(offset); + + WriteEpilogueCalleeSavesPostIndexed(context, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64); + WriteEpilogueCalleeSavesPostIndexed(context, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64); + } + + private static void WriteEpilogueCalleeSavesPostIndexed( + CodeGenContext context, + ref int mask, + ref int offset, + int calleeSaveRegionSize, + OperandType type) + { + while (mask != 0) + { + int reg = BitUtils.HighestBitSet(mask); + + mask &= ~(1 << reg); + + if (mask != 0) + { + int reg2 = BitUtils.HighestBitSet(mask); + + mask &= ~(1 << reg2); + + offset -= type.GetSizeInBytes() * 2; + + if (offset != 0) + { + context.Assembler.LdpRiUn(Register(reg2, type), Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.LdpRiPost(Register(reg2, type), Register(reg, type), Register(SpRegister), calleeSaveRegionSize); + } + } + else + { + offset -= type.GetSizeInBytes(); + + if (offset != 0) + { + context.Assembler.LdrRiUn(Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.LdrRiPost(Register(reg, type), Register(SpRegister), calleeSaveRegionSize); + } + } + } + } + + private static void GenerateConstantCopy(CodeGenContext context, Operand dest, ulong value) + { + if (value != 0) + { + int hw = 0; + bool first = true; + + while (value != 0) + { + int valueLow = (ushort)value; + if (valueLow != 0) + { + if (first) + { + context.Assembler.Movz(dest, valueLow, hw); + first = false; + } + else + { + context.Assembler.Movk(dest, valueLow, hw); + } + } + + hw++; + value >>= 16; + } + } + else + { + context.Assembler.Mov(dest, Register(ZrRegister, dest.Type)); + } + } + + private static void GenerateAtomicCas( + CodeGenContext context, + Operand address, + Operand expected, + Operand desired, + Operand actual, + Operand result, + AccessSize accessSize) + { + int startOffset = context.StreamOffset; + + switch (accessSize) + { + case AccessSize.Byte: + context.Assembler.Ldaxrb(actual, address); + break; + case AccessSize.Hword: + context.Assembler.Ldaxrh(actual, address); + break; + default: + context.Assembler.Ldaxr(actual, address); + break; + } + + context.Assembler.Cmp(actual, expected); + + context.JumpToNear(ArmCondition.Ne); + + switch (accessSize) + { + case AccessSize.Byte: + context.Assembler.Stlxrb(desired, address, result); + break; + case AccessSize.Hword: + context.Assembler.Stlxrh(desired, address, result); + break; + default: + context.Assembler.Stlxr(desired, address, result); + break; + } + + context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed. + + context.JumpHere(); + + context.Assembler.Clrex(); + } + + private static void GenerateAtomicDcas( + CodeGenContext context, + Operand address, + Operand expectedLow, + Operand expectedHigh, + Operand desiredLow, + Operand desiredHigh, + Operand actualLow, + Operand actualHigh, + Operand temp0, + Operand temp1) + { + int startOffset = context.StreamOffset; + + context.Assembler.Ldaxp(actualLow, actualHigh, address); + context.Assembler.Eor(temp0, actualHigh, expectedHigh); + context.Assembler.Eor(temp1, actualLow, expectedLow); + context.Assembler.Orr(temp0, temp1, temp0); + + context.JumpToNearIfNotZero(temp0); + + Operand result = Register(temp0, OperandType.I32); + + context.Assembler.Stlxp(desiredLow, desiredHigh, address, result); + context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed. + + context.JumpHere(); + + context.Assembler.Clrex(); + } + + private static bool TryPairMemoryOp(CodeGenContext context, Operation currentOp, Operation nextOp) + { + if (!TryGetMemOpBaseAndOffset(currentOp, out Operand op1Base, out int op1Offset)) + { + return false; + } + + if (!TryGetMemOpBaseAndOffset(nextOp, out Operand op2Base, out int op2Offset)) + { + return false; + } + + if (op1Base != op2Base) + { + return false; + } + + OperandType valueType = GetMemOpValueType(currentOp); + + if (valueType != GetMemOpValueType(nextOp) || op1Offset + valueType.GetSizeInBytes() != op2Offset) + { + return false; + } + + if (!CodeGenCommon.ConstFitsOnSImm7(op1Offset, valueType.GetSizeInBytesLog2())) + { + return false; + } + + if (currentOp.Instruction == Instruction.Load) + { + context.Assembler.LdpRiUn(currentOp.Destination, nextOp.Destination, op1Base, op1Offset); + } + else if (currentOp.Instruction == Instruction.Store) + { + context.Assembler.StpRiUn(currentOp.GetSource(1), nextOp.GetSource(1), op1Base, op1Offset); + } + else + { + return false; + } + + return true; + } + + private static bool IsLoadOrStore(Operation operation) + { + return operation.Instruction == Instruction.Load || operation.Instruction == Instruction.Store; + } + + private static OperandType GetMemOpValueType(Operation operation) + { + if (operation.Destination != default) + { + return operation.Destination.Type; + } + + return operation.GetSource(1).Type; + } + + private static bool TryGetMemOpBaseAndOffset(Operation operation, out Operand baseAddress, out int offset) + { + baseAddress = default; + offset = 0; + Operand address = operation.GetSource(0); + + if (address.Kind != OperandKind.Memory) + { + return false; + } + + MemoryOperand memOp = address.GetMemory(); + Operand baseOp = memOp.BaseAddress; + + if (baseOp == default) + { + baseOp = memOp.Index; + + if (baseOp == default || memOp.Scale != Multiplier.x1) + { + return false; + } + } + if (memOp.Index != default) + { + return false; + } + + baseAddress = memOp.BaseAddress; + offset = memOp.Displacement; + + return true; + } + + private static Operand Register(Operand operand, OperandType type = OperandType.I64) + { + return Register(operand.GetRegister().Index, type); + } + + private static Operand Register(int register, OperandType type = OperandType.I64) + { + return Factory.Register(register, RegisterType.Integer, type); + } + + private static int Align16(int value) + { + return (value + 0xf) & ~0xf; + } + + [Conditional("DEBUG")] + private static void ValidateUnOp(Operand dest, Operand source) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, source); + EnsureSameType(dest, source); + } + + [Conditional("DEBUG")] + private static void ValidateBinOp(Operand dest, Operand src1, Operand src2) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, src1); + EnsureSameType(dest, src1, src2); + } + + [Conditional("DEBUG")] + private static void ValidateShift(Operand dest, Operand src1, Operand src2) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, src1); + EnsureSameType(dest, src1); + + Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32); + } + + private static void EnsureSameReg(Operand op1, Operand op2) + { + Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory); + Debug.Assert(op1.Kind == op2.Kind); + Debug.Assert(op1.Value == op2.Value); + } + + private static void EnsureSameType(Operand op1, Operand op2) + { + Debug.Assert(op1.Type == op2.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + Debug.Assert(op1.Type == op4.Type); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs b/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs new file mode 100644 index 00000000..aaa00bb6 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs @@ -0,0 +1,662 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Diagnostics; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGeneratorIntrinsic + { + public static void GenerateOperation(CodeGenContext context, Operation operation) + { + Intrinsic intrin = operation.Intrinsic; + + IntrinsicInfo info = IntrinsicTable.GetInfo(intrin & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + switch (info.Type) + { + case IntrinsicType.ScalarUnary: + GenerateVectorUnary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarUnaryByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorUnaryByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarBinary: + GenerateVectorBinary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryFPByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryRd: + GenerateVectorUnary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryShl: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarBinaryShr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPCompare: + GenerateScalarFPCompare( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarFPConvFixed: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPConvFixedGpr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateScalarFPConvGpr( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPConvGpr: + GenerateScalarFPConvGpr( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarTernary: + GenerateScalarTernary( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2), + operation.GetSource(0)); + break; + case IntrinsicType.ScalarTernaryFPRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.ScalarTernaryShlRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + case IntrinsicType.ScalarTernaryShrRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + + case IntrinsicType.VectorUnary: + GenerateVectorUnary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.VectorUnaryByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorUnaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.VectorBinary: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryBitwise: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryFPByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryRd: + GenerateVectorUnary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryShl: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorBinaryShr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorFPConvFixed: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorInsertByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorInsertByElem( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(2)); + break; + case IntrinsicType.VectorLookupTable: + Debug.Assert((uint)(operation.SourcesCount - 2) <= 3); + + for (int i = 1; i < operation.SourcesCount - 1; i++) + { + Register currReg = operation.GetSource(i).GetRegister(); + Register prevReg = operation.GetSource(i - 1).GetRegister(); + + Debug.Assert(prevReg.Index + 1 == currReg.Index && currReg.Type == RegisterType.Vector); + } + + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst | ((uint)(operation.SourcesCount - 2) << 13), + operation.Destination, + operation.GetSource(0), + operation.GetSource(operation.SourcesCount - 1)); + break; + case IntrinsicType.VectorTernaryFPRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRd: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRdBitwise: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryShlRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + case IntrinsicType.VectorTernaryShrRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + + case IntrinsicType.GetRegister: + context.Assembler.WriteInstruction(info.Inst, operation.Destination); + break; + case IntrinsicType.SetRegister: + context.Assembler.WriteInstruction(info.Inst, operation.GetSource(0)); + break; + + default: + throw new NotImplementedException(info.Type.ToString()); + } + } + + private static void GenerateScalarFPCompare( + CodeGenContext context, + uint sz, + uint instruction, + Operand dest, + Operand rn, + Operand rm) + { + instruction |= (sz << 22); + + if (rm.Kind == OperandKind.Constant && rm.Value == 0) + { + instruction |= 0b1000; + rm = rn; + } + + context.Assembler.WriteInstructionRm16NoRet(instruction, rn, rm); + context.Assembler.Mrs(dest, 1, 3, 4, 2, 0); + } + + private static void GenerateScalarFPConvGpr( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn) + { + instruction |= (sz << 22); + + if (rd.Type.IsInteger()) + { + context.Assembler.WriteInstructionAuto(instruction, rd, rn); + } + else + { + if (rn.Type == OperandType.I64) + { + instruction |= Assembler.SfFlag; + } + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + } + + private static void GenerateScalarFPConvGpr( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint fBits) + { + Debug.Assert(fBits <= 64); + + instruction |= (sz << 22); + instruction |= (64 - fBits) << 10; + + if (rd.Type.IsInteger()) + { + Debug.Assert(rd.Type != OperandType.I32 || fBits <= 32); + + context.Assembler.WriteInstructionAuto(instruction, rd, rn); + } + else + { + if (rn.Type == OperandType.I64) + { + instruction |= Assembler.SfFlag; + } + else + { + Debug.Assert(fBits <= 32); + } + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + } + + private static void GenerateScalarTernary( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn, + Operand rm, + Operand ra) + { + instruction |= (sz << 22); + + context.Assembler.WriteInstruction(instruction, rd, rn, rm, ra); + } + + private static void GenerateVectorUnary( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn) + { + instruction |= (q << 30) | (sz << 22); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorUnaryByElem( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn) + { + uint imm5 = (srcIndex << ((int)sz + 1)) | (1u << (int)sz); + + instruction |= (q << 30) | (imm5 << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorBinary( + CodeGenContext context, + uint q, + uint instruction, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30); + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinary( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (sz << 22); + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryByElem( + CodeGenContext context, + uint q, + uint size, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (size << 22); + + if (size == 2) + { + instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10); + } + else + { + instruction |= ((srcIndex & 3) << 20) | ((srcIndex & 4) << 9); + } + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryFPByElem( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (sz << 22); + + if (sz != 0) + { + instruction |= (srcIndex & 1) << 11; + } + else + { + instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10); + } + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryShlImm( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint shift) + { + instruction |= (q << 30); + + Debug.Assert(shift >= 0 && shift < (8u << (int)sz)); + + uint imm = (8u << (int)sz) | (shift & (0x3fu >> (int)(3 - sz))); + + instruction |= (imm << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorBinaryShrImm( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint shift) + { + instruction |= (q << 30); + + Debug.Assert(shift > 0 && shift <= (8u << (int)sz)); + + uint imm = (8u << (int)sz) | ((8u << (int)sz) - shift); + + instruction |= (imm << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorInsertByElem( + CodeGenContext context, + uint sz, + uint instruction, + uint srcIndex, + uint dstIndex, + Operand rd, + Operand rn) + { + uint imm4 = srcIndex << (int)sz; + uint imm5 = (dstIndex << ((int)sz + 1)) | (1u << (int)sz); + + instruction |= imm4 << 11; + instruction |= imm5 << 16; + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs new file mode 100644 index 00000000..8695db90 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + struct IntrinsicInfo + { + public uint Inst { get; } + public IntrinsicType Type { get; } + + public IntrinsicInfo(uint inst, IntrinsicType type) + { + Inst = inst; + Type = type; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs new file mode 100644 index 00000000..53ef152e --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs @@ -0,0 +1,461 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class IntrinsicTable + { + private static IntrinsicInfo[] _intrinTable; + + static IntrinsicTable() + { + _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))]; + + Add(Intrinsic.Arm64AbsS, new IntrinsicInfo(0x5e20b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64AbsV, new IntrinsicInfo(0x0e20b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64AddhnV, new IntrinsicInfo(0x0e204000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64AddpS, new IntrinsicInfo(0x5e31b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64AddpV, new IntrinsicInfo(0x0e20bc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64BicVi, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorBinaryBitwiseImm)); + Add(Intrinsic.Arm64BicV, new IntrinsicInfo(0x0e601c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64BifV, new IntrinsicInfo(0x2ee01c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64BitV, new IntrinsicInfo(0x2ea01c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64BslV, new IntrinsicInfo(0x2e601c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64ClsV, new IntrinsicInfo(0x0e204800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ClzV, new IntrinsicInfo(0x2e204800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmeqS, new IntrinsicInfo(0x7e208c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmeqV, new IntrinsicInfo(0x2e208c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmeqSz, new IntrinsicInfo(0x5e209800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmeqVz, new IntrinsicInfo(0x0e209800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmgeS, new IntrinsicInfo(0x5e203c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmgeV, new IntrinsicInfo(0x0e203c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmgeSz, new IntrinsicInfo(0x7e208800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmgeVz, new IntrinsicInfo(0x2e208800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmgtS, new IntrinsicInfo(0x5e203400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmgtV, new IntrinsicInfo(0x0e203400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmgtSz, new IntrinsicInfo(0x5e208800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmgtVz, new IntrinsicInfo(0x0e208800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmhiS, new IntrinsicInfo(0x7e203400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmhiV, new IntrinsicInfo(0x2e203400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmhsS, new IntrinsicInfo(0x7e203c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmhsV, new IntrinsicInfo(0x2e203c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmleSz, new IntrinsicInfo(0x7e209800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmleVz, new IntrinsicInfo(0x2e209800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmltSz, new IntrinsicInfo(0x5e20a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmltVz, new IntrinsicInfo(0x0e20a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmtstS, new IntrinsicInfo(0x5e208c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmtstV, new IntrinsicInfo(0x0e208c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CntV, new IntrinsicInfo(0x0e205800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64DupSe, new IntrinsicInfo(0x5e000400u, IntrinsicType.ScalarUnaryByElem)); + Add(Intrinsic.Arm64DupVe, new IntrinsicInfo(0x0e000400u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64DupGp, new IntrinsicInfo(0x0e000c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64EorV, new IntrinsicInfo(0x2e201c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64ExtV, new IntrinsicInfo(0x2e000000u, IntrinsicType.VectorExt)); + Add(Intrinsic.Arm64FabdS, new IntrinsicInfo(0x7ea0d400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FabdV, new IntrinsicInfo(0x2ea0d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FabsV, new IntrinsicInfo(0x0ea0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FabsS, new IntrinsicInfo(0x1e20c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FacgeS, new IntrinsicInfo(0x7e20ec00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FacgeV, new IntrinsicInfo(0x2e20ec00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FacgtS, new IntrinsicInfo(0x7ea0ec00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FacgtV, new IntrinsicInfo(0x2ea0ec00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddpS, new IntrinsicInfo(0x7e30d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FaddpV, new IntrinsicInfo(0x2e20d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddV, new IntrinsicInfo(0x0e20d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddS, new IntrinsicInfo(0x1e202800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FccmpeS, new IntrinsicInfo(0x1e200410u, IntrinsicType.ScalarFPCompareCond)); + Add(Intrinsic.Arm64FccmpS, new IntrinsicInfo(0x1e200400u, IntrinsicType.ScalarFPCompareCond)); + Add(Intrinsic.Arm64FcmeqS, new IntrinsicInfo(0x5e20e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmeqV, new IntrinsicInfo(0x0e20e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmeqSz, new IntrinsicInfo(0x5ea0d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmeqVz, new IntrinsicInfo(0x0ea0d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmgeS, new IntrinsicInfo(0x7e20e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmgeV, new IntrinsicInfo(0x2e20e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmgeSz, new IntrinsicInfo(0x7ea0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmgeVz, new IntrinsicInfo(0x2ea0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmgtS, new IntrinsicInfo(0x7ea0e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmgtV, new IntrinsicInfo(0x2ea0e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmgtSz, new IntrinsicInfo(0x5ea0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmgtVz, new IntrinsicInfo(0x0ea0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmleSz, new IntrinsicInfo(0x7ea0d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmleVz, new IntrinsicInfo(0x2ea0d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmltSz, new IntrinsicInfo(0x5ea0e800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmltVz, new IntrinsicInfo(0x0ea0e800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmpeS, new IntrinsicInfo(0x1e202010u, IntrinsicType.ScalarFPCompare)); + Add(Intrinsic.Arm64FcmpS, new IntrinsicInfo(0x1e202000u, IntrinsicType.ScalarFPCompare)); + Add(Intrinsic.Arm64FcselS, new IntrinsicInfo(0x1e200c00u, IntrinsicType.ScalarFcsel)); + Add(Intrinsic.Arm64FcvtasS, new IntrinsicInfo(0x5e21c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtasV, new IntrinsicInfo(0x0e21c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtasGp, new IntrinsicInfo(0x1e240000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtauS, new IntrinsicInfo(0x7e21c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtauV, new IntrinsicInfo(0x2e21c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtauGp, new IntrinsicInfo(0x1e250000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtlV, new IntrinsicInfo(0x0e217800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmsS, new IntrinsicInfo(0x5e21b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtmsV, new IntrinsicInfo(0x0e21b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmsGp, new IntrinsicInfo(0x1e300000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtmuS, new IntrinsicInfo(0x7e21b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtmuV, new IntrinsicInfo(0x2e21b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmuGp, new IntrinsicInfo(0x1e310000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnsS, new IntrinsicInfo(0x5e21a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtnsV, new IntrinsicInfo(0x0e21a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtnsGp, new IntrinsicInfo(0x1e200000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnuS, new IntrinsicInfo(0x7e21a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtnuV, new IntrinsicInfo(0x2e21a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtnuGp, new IntrinsicInfo(0x1e210000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnV, new IntrinsicInfo(0x0e216800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64FcvtpsS, new IntrinsicInfo(0x5ea1a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtpsV, new IntrinsicInfo(0x0ea1a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtpsGp, new IntrinsicInfo(0x1e280000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtpuS, new IntrinsicInfo(0x7ea1a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtpuV, new IntrinsicInfo(0x2ea1a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtpuGp, new IntrinsicInfo(0x1e290000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtxnS, new IntrinsicInfo(0x7e216800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtxnV, new IntrinsicInfo(0x2e216800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzsSFixed, new IntrinsicInfo(0x5f00fc00u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64FcvtzsVFixed, new IntrinsicInfo(0x0f00fc00u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64FcvtzsS, new IntrinsicInfo(0x5ea1b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtzsV, new IntrinsicInfo(0x0ea1b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzsGpFixed, new IntrinsicInfo(0x1e180000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64FcvtzsGp, new IntrinsicInfo(0x1e380000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtzuSFixed, new IntrinsicInfo(0x7f00fc00u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64FcvtzuVFixed, new IntrinsicInfo(0x2f00fc00u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64FcvtzuS, new IntrinsicInfo(0x7ea1b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtzuV, new IntrinsicInfo(0x2ea1b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzuGpFixed, new IntrinsicInfo(0x1e190000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64FcvtzuGp, new IntrinsicInfo(0x1e390000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtS, new IntrinsicInfo(0x1e224000u, IntrinsicType.ScalarFPConv)); + Add(Intrinsic.Arm64FdivV, new IntrinsicInfo(0x2e20fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FdivS, new IntrinsicInfo(0x1e201800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmaddS, new IntrinsicInfo(0x1f000000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FmaxnmpS, new IntrinsicInfo(0x7e30c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmaxnmpV, new IntrinsicInfo(0x2e20c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxnmvV, new IntrinsicInfo(0x2e30c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FmaxnmV, new IntrinsicInfo(0x0e20c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxnmS, new IntrinsicInfo(0x1e206800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmaxpS, new IntrinsicInfo(0x7e30f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmaxpV, new IntrinsicInfo(0x2e20f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxvV, new IntrinsicInfo(0x2e30f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FmaxV, new IntrinsicInfo(0x0e20f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxS, new IntrinsicInfo(0x1e204800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FminnmpS, new IntrinsicInfo(0x7eb0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FminnmpV, new IntrinsicInfo(0x2ea0c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminnmvV, new IntrinsicInfo(0x2eb0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FminnmV, new IntrinsicInfo(0x0ea0c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminnmS, new IntrinsicInfo(0x1e207800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FminpS, new IntrinsicInfo(0x7eb0f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FminpV, new IntrinsicInfo(0x2ea0f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminvV, new IntrinsicInfo(0x2eb0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FminV, new IntrinsicInfo(0x0ea0f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminS, new IntrinsicInfo(0x1e205800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmlaSe, new IntrinsicInfo(0x5f801000u, IntrinsicType.ScalarTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlaVe, new IntrinsicInfo(0x0f801000u, IntrinsicType.VectorTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlaV, new IntrinsicInfo(0x0e20cc00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64FmlsSe, new IntrinsicInfo(0x5f805000u, IntrinsicType.ScalarTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlsVe, new IntrinsicInfo(0x0f805000u, IntrinsicType.VectorTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlsV, new IntrinsicInfo(0x0ea0cc00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64FmovVi, new IntrinsicInfo(0x0f00f400u, IntrinsicType.VectorFmovi)); + Add(Intrinsic.Arm64FmovS, new IntrinsicInfo(0x1e204000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmovGp, new IntrinsicInfo(0x1e260000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FmovSi, new IntrinsicInfo(0x1e201000u, IntrinsicType.ScalarFmovi)); + Add(Intrinsic.Arm64FmsubS, new IntrinsicInfo(0x1f008000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FmulxSe, new IntrinsicInfo(0x7f809000u, IntrinsicType.ScalarBinaryFPByElem)); + Add(Intrinsic.Arm64FmulxVe, new IntrinsicInfo(0x2f809000u, IntrinsicType.VectorBinaryFPByElem)); + Add(Intrinsic.Arm64FmulxS, new IntrinsicInfo(0x5e20dc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmulxV, new IntrinsicInfo(0x0e20dc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmulSe, new IntrinsicInfo(0x5f809000u, IntrinsicType.ScalarBinaryFPByElem)); + Add(Intrinsic.Arm64FmulVe, new IntrinsicInfo(0x0f809000u, IntrinsicType.VectorBinaryFPByElem)); + Add(Intrinsic.Arm64FmulV, new IntrinsicInfo(0x2e20dc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmulS, new IntrinsicInfo(0x1e200800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FnegV, new IntrinsicInfo(0x2ea0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FnegS, new IntrinsicInfo(0x1e214000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FnmaddS, new IntrinsicInfo(0x1f200000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FnmsubS, new IntrinsicInfo(0x1f208000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FnmulS, new IntrinsicInfo(0x1e208800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrecpeS, new IntrinsicInfo(0x5ea1d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrecpeV, new IntrinsicInfo(0x0ea1d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrecpsS, new IntrinsicInfo(0x5e20fc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrecpsV, new IntrinsicInfo(0x0e20fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FrecpxS, new IntrinsicInfo(0x5ea1f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintaV, new IntrinsicInfo(0x2e218800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintaS, new IntrinsicInfo(0x1e264000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintiV, new IntrinsicInfo(0x2ea19800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintiS, new IntrinsicInfo(0x1e27c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintmV, new IntrinsicInfo(0x0e219800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintmS, new IntrinsicInfo(0x1e254000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintnV, new IntrinsicInfo(0x0e218800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintnS, new IntrinsicInfo(0x1e244000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintpV, new IntrinsicInfo(0x0ea18800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintpS, new IntrinsicInfo(0x1e24c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintxV, new IntrinsicInfo(0x2e219800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintxS, new IntrinsicInfo(0x1e274000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintzV, new IntrinsicInfo(0x0ea19800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintzS, new IntrinsicInfo(0x1e25c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrsqrteS, new IntrinsicInfo(0x7ea1d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrsqrteV, new IntrinsicInfo(0x2ea1d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrsqrtsS, new IntrinsicInfo(0x5ea0fc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrsqrtsV, new IntrinsicInfo(0x0ea0fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FsqrtV, new IntrinsicInfo(0x2ea1f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FsqrtS, new IntrinsicInfo(0x1e21c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FsubV, new IntrinsicInfo(0x0ea0d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FsubS, new IntrinsicInfo(0x1e203800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64InsVe, new IntrinsicInfo(0x6e000400u, IntrinsicType.VectorInsertByElem)); + Add(Intrinsic.Arm64InsGp, new IntrinsicInfo(0x4e001c00u, IntrinsicType.ScalarUnaryByElem)); + Add(Intrinsic.Arm64Ld1rV, new IntrinsicInfo(0x0d40c000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld1Vms, new IntrinsicInfo(0x0c402000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld1Vss, new IntrinsicInfo(0x0d400000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld2rV, new IntrinsicInfo(0x0d60c000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld2Vms, new IntrinsicInfo(0x0c408000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld2Vss, new IntrinsicInfo(0x0d600000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld3rV, new IntrinsicInfo(0x0d40e000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld3Vms, new IntrinsicInfo(0x0c404000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld3Vss, new IntrinsicInfo(0x0d402000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld4rV, new IntrinsicInfo(0x0d60e000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld4Vms, new IntrinsicInfo(0x0c400000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld4Vss, new IntrinsicInfo(0x0d602000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64MlaVe, new IntrinsicInfo(0x2f000000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64MlaV, new IntrinsicInfo(0x0e209400u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi)); + Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister)); + Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister)); + Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64MulV, new IntrinsicInfo(0x0e209c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64MvniV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorMvni)); + Add(Intrinsic.Arm64NegS, new IntrinsicInfo(0x7e20b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64NegV, new IntrinsicInfo(0x2e20b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64NotV, new IntrinsicInfo(0x2e205800u, IntrinsicType.VectorUnaryBitwise)); + Add(Intrinsic.Arm64OrnV, new IntrinsicInfo(0x0ee01c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64OrrVi, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorBinaryBitwiseImm)); + Add(Intrinsic.Arm64OrrV, new IntrinsicInfo(0x0ea01c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64PmullV, new IntrinsicInfo(0x0e20e000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64PmulV, new IntrinsicInfo(0x2e209c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64RaddhnV, new IntrinsicInfo(0x2e204000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64RbitV, new IntrinsicInfo(0x2e605800u, IntrinsicType.VectorUnaryBitwise)); + Add(Intrinsic.Arm64Rev16V, new IntrinsicInfo(0x0e201800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Rev32V, new IntrinsicInfo(0x2e200800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Rev64V, new IntrinsicInfo(0x0e200800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64RshrnV, new IntrinsicInfo(0x0f008c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64RsubhnV, new IntrinsicInfo(0x2e206000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabalV, new IntrinsicInfo(0x0e205000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabaV, new IntrinsicInfo(0x0e207c00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabdlV, new IntrinsicInfo(0x0e207000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SabdV, new IntrinsicInfo(0x0e207400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SadalpV, new IntrinsicInfo(0x0e206800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SaddlpV, new IntrinsicInfo(0x0e202800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SaddlvV, new IntrinsicInfo(0x0e303800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SaddlV, new IntrinsicInfo(0x0e200000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SaddwV, new IntrinsicInfo(0x0e201000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64ScvtfSFixed, new IntrinsicInfo(0x5f00e400u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64ScvtfVFixed, new IntrinsicInfo(0x0f00e400u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64ScvtfS, new IntrinsicInfo(0x5e21d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64ScvtfV, new IntrinsicInfo(0x0e21d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ScvtfGpFixed, new IntrinsicInfo(0x1e020000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64ScvtfGp, new IntrinsicInfo(0x1e220000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64Sha1cV, new IntrinsicInfo(0x5e000000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1hV, new IntrinsicInfo(0x5e280800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha1mV, new IntrinsicInfo(0x5e002000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1pV, new IntrinsicInfo(0x5e001000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1su0V, new IntrinsicInfo(0x5e003000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1su1V, new IntrinsicInfo(0x5e281800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha256h2V, new IntrinsicInfo(0x5e005000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha256hV, new IntrinsicInfo(0x5e004000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha256su0V, new IntrinsicInfo(0x5e282800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha256su1V, new IntrinsicInfo(0x5e006000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64ShaddV, new IntrinsicInfo(0x0e200400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64ShllV, new IntrinsicInfo(0x2e213800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ShlS, new IntrinsicInfo(0x5f005400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64ShlV, new IntrinsicInfo(0x0f005400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64ShrnV, new IntrinsicInfo(0x0f008400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64ShsubV, new IntrinsicInfo(0x0e202400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SliS, new IntrinsicInfo(0x7f005400u, IntrinsicType.ScalarTernaryShlRd)); + Add(Intrinsic.Arm64SliV, new IntrinsicInfo(0x2f005400u, IntrinsicType.VectorTernaryShlRd)); + Add(Intrinsic.Arm64SmaxpV, new IntrinsicInfo(0x0e20a400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SmaxvV, new IntrinsicInfo(0x0e30a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SmaxV, new IntrinsicInfo(0x0e206400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SminpV, new IntrinsicInfo(0x0e20ac00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SminvV, new IntrinsicInfo(0x0e31a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SminV, new IntrinsicInfo(0x0e206c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SmlalVe, new IntrinsicInfo(0x0f002000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64SmlalV, new IntrinsicInfo(0x0e208000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SmlslVe, new IntrinsicInfo(0x0f006000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64SmlslV, new IntrinsicInfo(0x0e20a000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SmovV, new IntrinsicInfo(0x0e002c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64SmullVe, new IntrinsicInfo(0x0f00a000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SmullV, new IntrinsicInfo(0x0e20c000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqabsS, new IntrinsicInfo(0x5e207800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64SqabsV, new IntrinsicInfo(0x0e207800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SqaddS, new IntrinsicInfo(0x5e200c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqaddV, new IntrinsicInfo(0x0e200c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmlalSe, new IntrinsicInfo(0x5f003000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmlalVe, new IntrinsicInfo(0x0f003000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmlalS, new IntrinsicInfo(0x5e209000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmlalV, new IntrinsicInfo(0x0e209000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmlslSe, new IntrinsicInfo(0x5f007000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmlslVe, new IntrinsicInfo(0x0f007000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmlslS, new IntrinsicInfo(0x5e20b000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmlslV, new IntrinsicInfo(0x0e20b000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmulhSe, new IntrinsicInfo(0x5f00c000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmulhVe, new IntrinsicInfo(0x0f00c000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmulhS, new IntrinsicInfo(0x5e20b400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmulhV, new IntrinsicInfo(0x0e20b400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmullSe, new IntrinsicInfo(0x5f00b000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmullVe, new IntrinsicInfo(0x0f00b000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmullS, new IntrinsicInfo(0x5e20d000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmullV, new IntrinsicInfo(0x0e20d000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqnegS, new IntrinsicInfo(0x7e207800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64SqnegV, new IntrinsicInfo(0x2e207800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SqrdmulhSe, new IntrinsicInfo(0x5f00d000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqrdmulhVe, new IntrinsicInfo(0x0f00d000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqrdmulhS, new IntrinsicInfo(0x7e20b400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqrdmulhV, new IntrinsicInfo(0x2e20b400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqrshlS, new IntrinsicInfo(0x5e205c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqrshlV, new IntrinsicInfo(0x0e205c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqrshrnS, new IntrinsicInfo(0x5f009c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrnV, new IntrinsicInfo(0x0f009c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrunS, new IntrinsicInfo(0x7f008c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrunV, new IntrinsicInfo(0x2f008c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqshluS, new IntrinsicInfo(0x7f006400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64SqshluV, new IntrinsicInfo(0x2f006400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SqshlSi, new IntrinsicInfo(0x5f007400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64SqshlVi, new IntrinsicInfo(0x0f007400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SqshlS, new IntrinsicInfo(0x5e204c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqshlV, new IntrinsicInfo(0x0e204c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqshrnS, new IntrinsicInfo(0x5f009400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqshrnV, new IntrinsicInfo(0x0f009400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqshrunS, new IntrinsicInfo(0x7f008400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqshrunV, new IntrinsicInfo(0x2f008400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqsubS, new IntrinsicInfo(0x5e202c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqsubV, new IntrinsicInfo(0x0e202c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqxtnS, new IntrinsicInfo(0x5e214800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SqxtnV, new IntrinsicInfo(0x0e214800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SqxtunS, new IntrinsicInfo(0x7e212800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SqxtunV, new IntrinsicInfo(0x2e212800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SrhaddV, new IntrinsicInfo(0x0e201400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SriS, new IntrinsicInfo(0x7f004400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SriV, new IntrinsicInfo(0x2f004400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SrshlS, new IntrinsicInfo(0x5e205400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SrshlV, new IntrinsicInfo(0x0e205400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SrshrS, new IntrinsicInfo(0x5f002400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64SrshrV, new IntrinsicInfo(0x0f002400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64SrsraS, new IntrinsicInfo(0x5f003400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SrsraV, new IntrinsicInfo(0x0f003400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SshllV, new IntrinsicInfo(0x0f00a400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SshlS, new IntrinsicInfo(0x5e204400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SshlV, new IntrinsicInfo(0x0e204400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SshrS, new IntrinsicInfo(0x5f000400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64SshrV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64SsraS, new IntrinsicInfo(0x5f001400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SsraV, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SsublV, new IntrinsicInfo(0x0e202000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SsubwV, new IntrinsicInfo(0x0e203000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64St1Vms, new IntrinsicInfo(0x0c002000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St1Vss, new IntrinsicInfo(0x0d000000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St2Vms, new IntrinsicInfo(0x0c008000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St2Vss, new IntrinsicInfo(0x0d200000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St3Vms, new IntrinsicInfo(0x0c004000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St3Vss, new IntrinsicInfo(0x0d002000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St4Vms, new IntrinsicInfo(0x0c000000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St4Vss, new IntrinsicInfo(0x0d202000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64SubhnV, new IntrinsicInfo(0x0e206000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SubS, new IntrinsicInfo(0x7e208400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SubV, new IntrinsicInfo(0x2e208400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SuqaddS, new IntrinsicInfo(0x5e203800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SuqaddV, new IntrinsicInfo(0x0e203800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64TblV, new IntrinsicInfo(0x0e000000u, IntrinsicType.VectorLookupTable)); + Add(Intrinsic.Arm64TbxV, new IntrinsicInfo(0x0e001000u, IntrinsicType.VectorLookupTable)); + Add(Intrinsic.Arm64Trn1V, new IntrinsicInfo(0x0e002800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Trn2V, new IntrinsicInfo(0x0e006800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UabalV, new IntrinsicInfo(0x2e205000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UabaV, new IntrinsicInfo(0x2e207c00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UabdlV, new IntrinsicInfo(0x2e207000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UabdV, new IntrinsicInfo(0x2e207400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UadalpV, new IntrinsicInfo(0x2e206800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UaddlpV, new IntrinsicInfo(0x2e202800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UaddlvV, new IntrinsicInfo(0x2e303800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UaddlV, new IntrinsicInfo(0x2e200000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UaddwV, new IntrinsicInfo(0x2e201000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UcvtfSFixed, new IntrinsicInfo(0x7f00e400u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64UcvtfVFixed, new IntrinsicInfo(0x2f00e400u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64UcvtfS, new IntrinsicInfo(0x7e21d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64UcvtfV, new IntrinsicInfo(0x2e21d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UcvtfGpFixed, new IntrinsicInfo(0x1e030000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64UcvtfGp, new IntrinsicInfo(0x1e230000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64UhaddV, new IntrinsicInfo(0x2e200400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UhsubV, new IntrinsicInfo(0x2e202400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmaxpV, new IntrinsicInfo(0x2e20a400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmaxvV, new IntrinsicInfo(0x2e30a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UmaxV, new IntrinsicInfo(0x2e206400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UminpV, new IntrinsicInfo(0x2e20ac00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UminvV, new IntrinsicInfo(0x2e31a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UminV, new IntrinsicInfo(0x2e206c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmlalVe, new IntrinsicInfo(0x2f002000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64UmlalV, new IntrinsicInfo(0x2e208000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UmlslVe, new IntrinsicInfo(0x2f006000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64UmlslV, new IntrinsicInfo(0x2e20a000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UmovV, new IntrinsicInfo(0x0e003c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64UmullVe, new IntrinsicInfo(0x2f00a000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64UmullV, new IntrinsicInfo(0x2e20c000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqaddS, new IntrinsicInfo(0x7e200c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqaddV, new IntrinsicInfo(0x2e200c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqrshlS, new IntrinsicInfo(0x7e205c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqrshlV, new IntrinsicInfo(0x2e205c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqrshrnS, new IntrinsicInfo(0x7f009c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UqrshrnV, new IntrinsicInfo(0x2f009c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UqshlSi, new IntrinsicInfo(0x7f007400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64UqshlVi, new IntrinsicInfo(0x2f007400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64UqshlS, new IntrinsicInfo(0x7e204c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqshlV, new IntrinsicInfo(0x2e204c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqshrnS, new IntrinsicInfo(0x7f009400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UqshrnV, new IntrinsicInfo(0x2f009400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UqsubS, new IntrinsicInfo(0x7e202c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqsubV, new IntrinsicInfo(0x2e202c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqxtnS, new IntrinsicInfo(0x7e214800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64UqxtnV, new IntrinsicInfo(0x2e214800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UrecpeV, new IntrinsicInfo(0x0ea1c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UrhaddV, new IntrinsicInfo(0x2e201400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UrshlS, new IntrinsicInfo(0x7e205400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UrshlV, new IntrinsicInfo(0x2e205400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UrshrS, new IntrinsicInfo(0x7f002400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64UrshrV, new IntrinsicInfo(0x2f002400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64UrsqrteV, new IntrinsicInfo(0x2ea1c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UrsraS, new IntrinsicInfo(0x7f003400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UrsraV, new IntrinsicInfo(0x2f003400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UshllV, new IntrinsicInfo(0x2f00a400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64UshlS, new IntrinsicInfo(0x7e204400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UshlV, new IntrinsicInfo(0x2e204400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UshrS, new IntrinsicInfo(0x7f000400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64UshrV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64UsqaddS, new IntrinsicInfo(0x7e203800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64UsqaddV, new IntrinsicInfo(0x2e203800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UsraS, new IntrinsicInfo(0x7f001400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UsraV, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UsublV, new IntrinsicInfo(0x2e202000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UsubwV, new IntrinsicInfo(0x2e203000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Uzp1V, new IntrinsicInfo(0x0e001800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Uzp2V, new IntrinsicInfo(0x0e005800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64XtnV, new IntrinsicInfo(0x0e212800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Zip1V, new IntrinsicInfo(0x0e003800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Zip2V, new IntrinsicInfo(0x0e007800u, IntrinsicType.VectorBinary)); + } + + private static void Add(Intrinsic intrin, IntrinsicInfo info) + { + _intrinTable[(int)intrin] = info; + } + + public static IntrinsicInfo GetInfo(Intrinsic intrin) + { + return _intrinTable[(int)intrin]; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs new file mode 100644 index 00000000..800eca93 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs @@ -0,0 +1,59 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + enum IntrinsicType + { + ScalarUnary, + ScalarUnaryByElem, + ScalarBinary, + ScalarBinaryByElem, + ScalarBinaryFPByElem, + ScalarBinaryRd, + ScalarBinaryShl, + ScalarBinaryShr, + ScalarFcsel, + ScalarFmovi, + ScalarFPCompare, + ScalarFPCompareCond, + ScalarFPConv, + ScalarFPConvFixed, + ScalarFPConvFixedGpr, + ScalarFPConvGpr, + ScalarTernary, + ScalarTernaryFPRdByElem, + ScalarTernaryShlRd, + ScalarTernaryShrRd, + + VectorUnary, + VectorUnaryBitwise, + VectorUnaryByElem, + VectorBinary, + VectorBinaryBitwise, + VectorBinaryBitwiseImm, + VectorBinaryByElem, + VectorBinaryFPByElem, + VectorBinaryRd, + VectorBinaryShl, + VectorBinaryShr, + VectorExt, + VectorFmovi, + VectorFPConvFixed, + VectorInsertByElem, + VectorLdSt, + VectorLdStSs, + VectorLookupTable, + VectorMovi, + VectorMvni, + VectorTernaryFPRdByElem, + VectorTernaryRd, + VectorTernaryRdBitwise, + VectorTernaryRdByElem, + VectorTernaryShlRd, + VectorTernaryShrRd, + + Vector128Unary, + Vector128Binary, + + GetRegister, + SetRegister + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/PreAllocator.cs b/ARMeilleure/CodeGen/Arm64/PreAllocator.cs new file mode 100644 index 00000000..a7f07394 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/PreAllocator.cs @@ -0,0 +1,940 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + class PreAllocator + { + private class ConstantDict + { + private readonly Dictionary<(ulong, OperandType), Operand> _constants; + + public ConstantDict() + { + _constants = new Dictionary<(ulong, OperandType), Operand>(); + } + + public void Add(ulong value, OperandType type, Operand local) + { + _constants.Add((value, type), local); + } + + public bool TryGetValue(ulong value, OperandType type, out Operand local) + { + return _constants.TryGetValue((value, type), out local); + } + } + + public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs) + { + maxCallArgs = -1; + + Span<Operation> buffer = default; + + Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()]; + + for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext) + { + ConstantDict constants = new ConstantDict(); + + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + if (node.Instruction == Instruction.Phi) + { + continue; + } + + HandleConstantRegCopy(constants, block.Operations, node); + HandleDestructiveRegCopy(block.Operations, node); + + switch (node.Instruction) + { + case Instruction.Call: + // Get the maximum number of arguments used on a call. + // On windows, when a struct is returned from the call, + // we also need to pass the pointer where the struct + // should be written on the first argument. + int argsCount = node.SourcesCount - 1; + + if (node.Destination != default && node.Destination.Type == OperandType.V128) + { + argsCount++; + } + + if (maxCallArgs < argsCount) + { + maxCallArgs = argsCount; + } + + // Copy values to registers expected by the function + // being called, as mandated by the ABI. + HandleCall(constants, block.Operations, node); + break; + case Instruction.CompareAndSwap: + case Instruction.CompareAndSwap16: + case Instruction.CompareAndSwap8: + nextNode = HandleCompareAndSwap(block.Operations, node); + break; + case Instruction.LoadArgument: + nextNode = HandleLoadArgument(cctx, ref buffer, block.Operations, preservedArgs, node); + break; + case Instruction.Return: + HandleReturn(block.Operations, node); + break; + case Instruction.Tailcall: + HandleTailcall(constants, block.Operations, stackAlloc, node, node); + break; + } + } + } + } + + private static void HandleConstantRegCopy(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node) + { + if (node.SourcesCount == 0 || IsIntrinsicWithConst(node)) + { + return; + } + + Instruction inst = node.Instruction; + + Operand src1 = node.GetSource(0); + Operand src2; + + if (src1.Kind == OperandKind.Constant) + { + if (!src1.Type.IsInteger()) + { + // Handle non-integer types (FP32, FP64 and V128). + // For instructions without an immediate operand, we do the following: + // - Insert a copy with the constant value (as integer) to a GPR. + // - Insert a copy from the GPR to a XMM register. + // - Replace the constant use with the XMM register. + src1 = AddFloatConstantCopy(constants, nodes, node, src1); + + node.SetSource(0, src1); + } + else if (!HasConstSrc1(node, src1.Value)) + { + // Handle integer types. + // Most ALU instructions accepts a 32-bits immediate on the second operand. + // We need to ensure the following: + // - If the constant is on operand 1, we need to move it. + // -- But first, we try to swap operand 1 and 2 if the instruction is commutative. + // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy. + // - If the constant is on operand 2, we check if the instruction supports it, + // if not, we also add a copy. 64-bits constants are usually not supported. + if (IsCommutative(node)) + { + src2 = node.GetSource(1); + + Operand temp = src1; + + src1 = src2; + src2 = temp; + + node.SetSource(0, src1); + node.SetSource(1, src2); + } + + if (src1.Kind == OperandKind.Constant) + { + src1 = AddIntConstantCopy(constants, nodes, node, src1); + + node.SetSource(0, src1); + } + } + } + + if (node.SourcesCount < 2) + { + return; + } + + src2 = node.GetSource(1); + + if (src2.Kind == OperandKind.Constant) + { + if (!src2.Type.IsInteger()) + { + src2 = AddFloatConstantCopy(constants, nodes, node, src2); + + node.SetSource(1, src2); + } + else if (!HasConstSrc2(inst, src2)) + { + src2 = AddIntConstantCopy(constants, nodes, node, src2); + + node.SetSource(1, src2); + } + } + + if (node.SourcesCount < 3 || + node.Instruction == Instruction.BranchIf || + node.Instruction == Instruction.Compare || + node.Instruction == Instruction.VectorInsert || + node.Instruction == Instruction.VectorInsert16 || + node.Instruction == Instruction.VectorInsert8) + { + return; + } + + for (int srcIndex = 2; srcIndex < node.SourcesCount; srcIndex++) + { + Operand src = node.GetSource(srcIndex); + + if (src.Kind == OperandKind.Constant) + { + if (!src.Type.IsInteger()) + { + src = AddFloatConstantCopy(constants, nodes, node, src); + + node.SetSource(srcIndex, src); + } + else + { + src = AddIntConstantCopy(constants, nodes, node, src); + + node.SetSource(srcIndex, src); + } + } + } + } + + private static void HandleDestructiveRegCopy(IntrusiveList<Operation> nodes, Operation node) + { + if (node.Destination == default || node.SourcesCount == 0) + { + return; + } + + Operand dest = node.Destination; + Operand src1 = node.GetSource(0); + + if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable) + { + bool useNewLocal = false; + + for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++) + { + if (node.GetSource(srcIndex) == dest) + { + useNewLocal = true; + + break; + } + } + + if (useNewLocal) + { + // Dest is being used as some source already, we need to use a new + // local to store the temporary value, otherwise the value on dest + // local would be overwritten. + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1)); + + node.SetSource(0, temp); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp)); + + node.Destination = temp; + } + else + { + nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1)); + + node.SetSource(0, dest); + } + } + } + + private static void HandleCall(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node) + { + Operation operation = node; + + Operand dest = operation.Destination; + + List<Operand> sources = new List<Operand> + { + operation.GetSource(0) + }; + + int argsCount = operation.SourcesCount - 1; + + int intMax = CallingConvention.GetArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + int stackOffset = 0; + + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(index + 1); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < intMax; + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + Operand offset = Const(stackOffset); + + Operation spillOp = Operation(Instruction.SpillArg, default, offset, source); + + HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, spillOp)); + + stackOffset += source.Type.GetSizeInBytes(); + } + } + + if (dest != default) + { + if (dest.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg)); + nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1))); + + operation.Destination = default; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, dest, retReg); + + nodes.AddAfter(node, copyOp); + + operation.Destination = retReg; + } + } + + operation.SetSources(sources.ToArray()); + } + + private static void HandleTailcall( + ConstantDict constants, + IntrusiveList<Operation> nodes, + StackAllocator stackAlloc, + Operation node, + Operation operation) + { + List<Operand> sources = new List<Operand> + { + operation.GetSource(0) + }; + + int argsCount = operation.SourcesCount - 1; + + int intMax = CallingConvention.GetArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(1 + index); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)"); + } + } + + // The target address must be on the return registers, since we + // don't return anything and it is guaranteed to not be a + // callee saved register (which would be trashed on the epilogue). + Operand tcAddress = Gpr(CodeGenCommon.TcAddressRegister, OperandType.I64); + + Operation addrCopyOp = Operation(Instruction.Copy, tcAddress, operation.GetSource(0)); + + nodes.AddBefore(node, addrCopyOp); + + sources[0] = tcAddress; + + operation.SetSources(sources.ToArray()); + } + + private static Operation HandleCompareAndSwap(IntrusiveList<Operation> nodes, Operation node) + { + Operand expected = node.GetSource(1); + + if (expected.Type == OperandType.V128) + { + Operand dest = node.Destination; + Operand expectedLow = Local(OperandType.I64); + Operand expectedHigh = Local(OperandType.I64); + Operand desiredLow = Local(OperandType.I64); + Operand desiredHigh = Local(OperandType.I64); + Operand actualLow = Local(OperandType.I64); + Operand actualHigh = Local(OperandType.I64); + + Operand address = node.GetSource(0); + Operand desired = node.GetSource(2); + + void SplitOperand(Operand source, Operand low, Operand high) + { + nodes.AddBefore(node, Operation(Instruction.VectorExtract, low, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, high, source, Const(1))); + } + + SplitOperand(expected, expectedLow, expectedHigh); + SplitOperand(desired, desiredLow, desiredHigh); + + Operation operation = node; + + // Update the sources and destinations with split 64-bit halfs of the whole 128-bit values. + // We also need a additional registers that will be used to store temporary information. + operation.SetDestinations(new[] { actualLow, actualHigh, Local(OperandType.I64), Local(OperandType.I64) }); + operation.SetSources(new[] { address, expectedLow, expectedHigh, desiredLow, desiredHigh }); + + // Add some dummy uses of the input operands, as the CAS operation will be a loop, + // so they can't be used as destination operand. + for (int i = 0; i < operation.SourcesCount; i++) + { + Operand src = operation.GetSource(i); + node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src)); + } + + // Assemble the vector with the 64-bit values at the given memory location. + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, actualLow)); + node = nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, actualHigh, Const(1))); + } + else + { + // We need a additional register where the store result will be written to. + node.SetDestinations(new[] { node.Destination, Local(OperandType.I32) }); + + // Add some dummy uses of the input operands, as the CAS operation will be a loop, + // so they can't be used as destination operand. + Operation operation = node; + + for (int i = 0; i < operation.SourcesCount; i++) + { + Operand src = operation.GetSource(i); + node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src)); + } + } + + return node.ListNext; + } + + private static void HandleReturn(IntrusiveList<Operation> nodes, Operation node) + { + if (node.SourcesCount == 0) + { + return; + } + + Operand source = node.GetSource(0); + + if (source.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1))); + } + else + { + Operand retReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + + Operation retCopyOp = Operation(Instruction.Copy, retReg, source); + + nodes.AddBefore(node, retCopyOp); + } + } + + private static Operation HandleLoadArgument( + CompilerContext cctx, + ref Span<Operation> buffer, + IntrusiveList<Operation> nodes, + Operand[] preservedArgs, + Operation node) + { + Operand source = node.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int index = source.AsInt32(); + + int intCount = 0; + int vecCount = 0; + + for (int cIndex = 0; cIndex < index; cIndex++) + { + OperandType argType = cctx.FuncArgTypes[cIndex]; + + if (argType.IsInteger()) + { + intCount++; + } + else if (argType == OperandType.V128) + { + intCount += 2; + } + else + { + vecCount++; + } + } + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < CallingConvention.GetArgumentsOnRegsCount(); + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < CallingConvention.GetArgumentsOnRegsCount(); + } + else + { + passOnReg = vecCount < CallingConvention.GetArgumentsOnRegsCount(); + } + + if (passOnReg) + { + Operand dest = node.Destination; + + if (preservedArgs[index] == default) + { + if (dest.Type == OperandType.V128) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand pArg = Local(OperandType.V128); + + Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64); + Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64); + + Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg); + Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1)); + + cctx.Cfg.Entry.Operations.AddFirst(copyH); + cctx.Cfg.Entry.Operations.AddFirst(copyL); + + preservedArgs[index] = pArg; + } + else + { + Operand pArg = Local(dest.Type); + + Operand argReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + } + + Operation nextNode; + + if (dest.AssignmentsCount == 1) + { + // Let's propagate the argument if we can to avoid copies. + Propagate(ref buffer, dest, preservedArgs[index]); + nextNode = node.ListNext; + } + else + { + Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]); + nextNode = nodes.AddBefore(node, argCopyOp); + } + + Delete(nodes, node); + return nextNode; + } + else + { + // TODO: Pass on stack. + return node; + } + } + + private static void Propagate(ref Span<Operation> buffer, Operand dest, Operand value) + { + ReadOnlySpan<Operation> uses = dest.GetUses(ref buffer); + + foreach (Operation use in uses) + { + for (int srcIndex = 0; srcIndex < use.SourcesCount; srcIndex++) + { + Operand useSrc = use.GetSource(srcIndex); + + if (useSrc == dest) + { + use.SetSource(srcIndex, value); + } + else if (useSrc.Kind == OperandKind.Memory) + { + MemoryOperand memoryOp = useSrc.GetMemory(); + + Operand baseAddr = memoryOp.BaseAddress; + Operand index = memoryOp.Index; + bool changed = false; + + if (baseAddr == dest) + { + baseAddr = value; + changed = true; + } + + if (index == dest) + { + index = value; + changed = true; + } + + if (changed) + { + use.SetSource(srcIndex, MemoryOp( + useSrc.Type, + baseAddr, + index, + memoryOp.Scale, + memoryOp.Displacement)); + } + } + } + } + } + + private static Operand AddFloatConstantCopy( + ConstantDict constants, + IntrusiveList<Operation> nodes, + Operation node, + Operand source) + { + Operand temp = Local(source.Type); + + Operand intConst = AddIntConstantCopy(constants, nodes, node, GetIntConst(source)); + + Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst); + + nodes.AddBefore(node, copyOp); + + return temp; + } + + private static Operand AddIntConstantCopy( + ConstantDict constants, + IntrusiveList<Operation> nodes, + Operation node, + Operand source) + { + if (constants.TryGetValue(source.Value, source.Type, out Operand temp)) + { + return temp; + } + + temp = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, temp, source); + + nodes.AddBefore(node, copyOp); + + constants.Add(source.Value, source.Type, temp); + + return temp; + } + + private static Operand GetIntConst(Operand value) + { + if (value.Type == OperandType.FP32) + { + return Const(value.AsInt32()); + } + else if (value.Type == OperandType.FP64) + { + return Const(value.AsInt64()); + } + + return value; + } + + private static void Delete(IntrusiveList<Operation> nodes, Operation node) + { + node.Destination = default; + + for (int index = 0; index < node.SourcesCount; index++) + { + node.SetSource(index, default); + } + + nodes.Remove(node); + } + + private static Operand Gpr(int register, OperandType type) + { + return Register(register, RegisterType.Integer, type); + } + + private static Operand Xmm(int register, OperandType type) + { + return Register(register, RegisterType.Vector, type); + } + + private static bool IsSameOperandDestSrc1(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Extended: + return IsSameOperandDestSrc1(operation.Intrinsic); + case Instruction.VectorInsert: + case Instruction.VectorInsert16: + case Instruction.VectorInsert8: + return true; + } + + return false; + } + + private static bool IsSameOperandDestSrc1(Intrinsic intrinsic) + { + IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + return info.Type == IntrinsicType.ScalarBinaryRd || + info.Type == IntrinsicType.ScalarTernaryFPRdByElem || + info.Type == IntrinsicType.ScalarTernaryShlRd || + info.Type == IntrinsicType.ScalarTernaryShrRd || + info.Type == IntrinsicType.VectorBinaryRd || + info.Type == IntrinsicType.VectorInsertByElem || + info.Type == IntrinsicType.VectorTernaryRd || + info.Type == IntrinsicType.VectorTernaryRdBitwise || + info.Type == IntrinsicType.VectorTernaryFPRdByElem || + info.Type == IntrinsicType.VectorTernaryRdByElem || + info.Type == IntrinsicType.VectorTernaryShlRd || + info.Type == IntrinsicType.VectorTernaryShrRd; + } + + private static bool HasConstSrc1(Operation node, ulong value) + { + switch (node.Instruction) + { + case Instruction.Add: + case Instruction.BranchIf: + case Instruction.Compare: + case Instruction.Subtract: + // The immediate encoding of those instructions does not allow Rn to be + // XZR (it will be SP instead), so we can't allow a Rn constant in this case. + return value == 0 && NotConstOrConst0(node.GetSource(1)); + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseNot: + case Instruction.BitwiseOr: + case Instruction.ByteSwap: + case Instruction.CountLeadingZeros: + case Instruction.Multiply: + case Instruction.Negate: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + return value == 0; + case Instruction.Copy: + case Instruction.LoadArgument: + case Instruction.Spill: + case Instruction.SpillArg: + return true; + case Instruction.Extended: + return value == 0; + } + + return false; + } + + private static bool NotConstOrConst0(Operand operand) + { + return operand.Kind != OperandKind.Constant || operand.Value == 0; + } + + private static bool HasConstSrc2(Instruction inst, Operand operand) + { + ulong value = operand.Value; + + switch (inst) + { + case Instruction.Add: + case Instruction.BranchIf: + case Instruction.Compare: + case Instruction.Subtract: + return ConstFitsOnUImm12Sh(value); + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + return value == 0 || CodeGenCommon.TryEncodeBitMask(operand, out _, out _, out _); + case Instruction.Multiply: + case Instruction.Store: + case Instruction.Store16: + case Instruction.Store8: + return value == 0; + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.VectorExtract: + case Instruction.VectorExtract16: + case Instruction.VectorExtract8: + return true; + case Instruction.Extended: + // TODO: Check if actual intrinsic is supposed to have consts here? + // Right now we only hit this case for fixed-point int <-> FP conversion instructions. + return true; + } + + return false; + } + + private static bool IsCommutative(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.Multiply: + return true; + + case Instruction.BranchIf: + case Instruction.Compare: + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var compType = (Comparison)comp.AsInt32(); + + return compType == Comparison.Equal || compType == Comparison.NotEqual; + } + } + + return false; + } + + private static bool ConstFitsOnUImm12Sh(ulong value) + { + return (value & ~0xfffUL) == 0 || (value & ~0xfff000UL) == 0; + } + + private static bool IsIntrinsicWithConst(Operation operation) + { + bool isIntrinsic = IsIntrinsic(operation.Instruction); + + if (isIntrinsic) + { + Intrinsic intrinsic = operation.Intrinsic; + IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + // Those have integer inputs that don't support consts. + return info.Type != IntrinsicType.ScalarFPConvGpr && + info.Type != IntrinsicType.ScalarFPConvFixedGpr && + info.Type != IntrinsicType.SetRegister; + } + + return false; + } + + private static bool IsIntrinsic(Instruction inst) + { + return inst == Instruction.Extended; + } + } +} diff --git a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs index 0423c255..c5a22a53 100644 --- a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs +++ b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs @@ -90,6 +90,47 @@ namespace ARMeilleure.CodeGen.Optimizations } break; + case Instruction.Compare: + if (type == OperandType.I32 && + operation.GetSource(0).Type == type && + operation.GetSource(1).Type == type) + { + switch ((Comparison)operation.GetSource(2).Value) + { + case Comparison.Equal: + EvaluateBinaryI32(operation, (x, y) => x == y ? 1 : 0); + break; + case Comparison.NotEqual: + EvaluateBinaryI32(operation, (x, y) => x != y ? 1 : 0); + break; + case Comparison.Greater: + EvaluateBinaryI32(operation, (x, y) => x > y ? 1 : 0); + break; + case Comparison.LessOrEqual: + EvaluateBinaryI32(operation, (x, y) => x <= y ? 1 : 0); + break; + case Comparison.GreaterUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x > (uint)y ? 1 : 0); + break; + case Comparison.LessOrEqualUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x <= (uint)y ? 1 : 0); + break; + case Comparison.GreaterOrEqual: + EvaluateBinaryI32(operation, (x, y) => x >= y ? 1 : 0); + break; + case Comparison.Less: + EvaluateBinaryI32(operation, (x, y) => x < y ? 1 : 0); + break; + case Comparison.GreaterOrEqualUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x >= (uint)y ? 1 : 0); + break; + case Comparison.LessUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x < (uint)y ? 1 : 0); + break; + } + } + break; + case Instruction.Copy: if (type == OperandType.I32) { diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs index 919e996b..a45bb455 100644 --- a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs +++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs @@ -44,8 +44,8 @@ namespace ARMeilleure.CodeGen.Optimizations ConstantFolding.RunPass(node); Simplification.RunPass(node); - if (DestIsLocalVar(node)) - { + if (DestIsSingleLocalVar(node)) + { if (IsPropagableCompare(node)) { modified |= PropagateCompare(ref buffer, node); @@ -99,20 +99,6 @@ namespace ARMeilleure.CodeGen.Optimizations while (modified); } - private static Span<Operation> GetUses(ref Span<Operation> buffer, Operand operand) - { - ReadOnlySpan<Operation> uses = operand.Uses; - - if (buffer.Length < uses.Length) - { - buffer = Allocators.Default.AllocateSpan<Operation>((uint)uses.Length); - } - - uses.CopyTo(buffer); - - return buffer.Slice(0, uses.Length); - } - private static bool PropagateCompare(ref Span<Operation> buffer, Operation compOp) { // Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form @@ -160,7 +146,7 @@ namespace ARMeilleure.CodeGen.Optimizations Comparison compType = (Comparison)comp.AsInt32(); - Span<Operation> uses = GetUses(ref buffer, dest); + Span<Operation> uses = dest.GetUses(ref buffer); foreach (Operation use in uses) { @@ -199,7 +185,7 @@ namespace ARMeilleure.CodeGen.Optimizations Operand dest = copyOp.Destination; Operand source = copyOp.GetSource(0); - Span<Operation> uses = GetUses(ref buffer, dest); + Span<Operation> uses = dest.GetUses(ref buffer); foreach (Operation use in uses) { @@ -231,12 +217,12 @@ namespace ARMeilleure.CodeGen.Optimizations private static bool IsUnused(Operation node) { - return DestIsLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node); + return DestIsSingleLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node); } - private static bool DestIsLocalVar(Operation node) + private static bool DestIsSingleLocalVar(Operation node) { - return node.Destination != default && node.Destination.Kind == OperandKind.LocalVariable; + return node.DestinationsCount == 1 && node.Destination.Kind == OperandKind.LocalVariable; } private static bool HasSideEffects(Operation node) diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs index d8a40365..6ea62c28 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs @@ -17,8 +17,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private const int InstructionGap = 2; private const int InstructionGapMask = InstructionGap - 1; - private const int RegistersCount = 16; - private HashSet<int> _blockEdges; private LiveRange[] _blockRanges; private BitMap[] _blockLiveIn; @@ -59,7 +57,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators void PopulateFreePositions(RegisterType type, out int[] positions, out int count) { - positions = new int[RegistersCount]; + positions = new int[masks.RegistersCount]; count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type)); int mask = masks.GetAvailableRegisters(type); @@ -115,7 +113,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators StackAllocator stackAlloc, RegisterMasks regMasks) { - NumberLocals(cfg); + NumberLocals(cfg, regMasks.RegistersCount); var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count); @@ -134,22 +132,25 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { context.Active.Set(index); - if (current.Register.Type == RegisterType.Integer) - { - context.IntUsedRegisters |= 1 << current.Register.Index; - } - else /* if (interval.Register.Type == RegisterType.Vector) */ + if (current.IsFixedAndUsed) { - context.VecUsedRegisters |= 1 << current.Register.Index; + if (current.Register.Type == RegisterType.Integer) + { + context.IntUsedRegisters |= 1 << current.Register.Index; + } + else /* if (interval.Register.Type == RegisterType.Vector) */ + { + context.VecUsedRegisters |= 1 << current.Register.Index; + } } continue; } - AllocateInterval(context, current, index); + AllocateInterval(context, current, index, regMasks.RegistersCount); } - for (int index = RegistersCount * 2; index < _intervals.Count; index++) + for (int index = regMasks.RegistersCount * 2; index < _intervals.Count; index++) { if (!_intervals[index].IsSpilled) { @@ -163,7 +164,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize); } - private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex) + private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex, int registersCount) { // Check active intervals that already ended. foreach (int iIndex in context.Active) @@ -199,17 +200,17 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - if (!TryAllocateRegWithoutSpill(context, current, cIndex)) + if (!TryAllocateRegWithoutSpill(context, current, cIndex, registersCount)) { - AllocateRegWithSpill(context, current, cIndex); + AllocateRegWithSpill(context, current, cIndex, registersCount); } } - private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex) + private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount) { RegisterType regType = current.Local.Type.ToRegisterType(); - Span<int> freePositions = stackalloc int[RegistersCount]; + Span<int> freePositions = stackalloc int[registersCount]; context.GetFreePositions(regType, freePositions, out int freePositionsCount); @@ -278,7 +279,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); - InsertInterval(splitChild); + InsertInterval(splitChild, registersCount); } else { @@ -302,12 +303,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return true; } - private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex) + private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount) { RegisterType regType = current.Local.Type.ToRegisterType(); - Span<int> usePositions = stackalloc int[RegistersCount]; - Span<int> blockedPositions = stackalloc int[RegistersCount]; + Span<int> usePositions = stackalloc int[registersCount]; + Span<int> blockedPositions = stackalloc int[registersCount]; context.GetFreePositions(regType, usePositions, out _); context.GetFreePositions(regType, blockedPositions, out _); @@ -386,7 +387,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); - InsertInterval(splitChild); + InsertInterval(splitChild, registersCount); Spill(context, current); } @@ -396,7 +397,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators // so we only need to split the intervals using the selected register. current.Register = new Register(selectedReg, regType); - SplitAndSpillOverlappingIntervals(context, current); + SplitAndSpillOverlappingIntervals(context, current, registersCount); context.Active.Set(cIndex); } @@ -417,14 +418,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); - InsertInterval(splitChild); + InsertInterval(splitChild, registersCount); } else { Spill(context, splitChild); } - SplitAndSpillOverlappingIntervals(context, current); + SplitAndSpillOverlappingIntervals(context, current, registersCount); context.Active.Set(cIndex); } @@ -460,7 +461,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return selected; } - private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current) + private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current, int registersCount) { foreach (int iIndex in context.Active) { @@ -468,7 +469,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators if (!interval.IsFixed && interval.Register == current.Register) { - SplitAndSpillOverlappingInterval(context, current, interval); + SplitAndSpillOverlappingInterval(context, current, interval, registersCount); context.Active.Clear(iIndex); } @@ -480,7 +481,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current)) { - SplitAndSpillOverlappingInterval(context, current, interval); + SplitAndSpillOverlappingInterval(context, current, interval, registersCount); context.Inactive.Clear(iIndex); } @@ -490,7 +491,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private void SplitAndSpillOverlappingInterval( AllocationContext context, LiveInterval current, - LiveInterval interval) + LiveInterval interval, + int registersCount) { // If there's a next use after the start of the current interval, // we need to split the spilled interval twice, and re-insert it @@ -522,7 +524,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators splitChild = right; } - InsertInterval(splitChild); + InsertInterval(splitChild, registersCount); } else { @@ -530,13 +532,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - private void InsertInterval(LiveInterval interval) + private void InsertInterval(LiveInterval interval, int registersCount) { Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses."); Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval."); Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval."); - int startIndex = RegistersCount * 2; + int startIndex = registersCount * 2; int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null); @@ -790,12 +792,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return _operationNodes[position / InstructionGap]; } - private void NumberLocals(ControlFlowGraph cfg) + private void NumberLocals(ControlFlowGraph cfg, int registersCount) { _operationNodes = new List<(IntrusiveList<Operation>, Operation)>(); _intervals = new List<LiveInterval>(); - for (int index = 0; index < RegistersCount; index++) + for (int index = 0; index < registersCount; index++) { _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer))); _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector))); @@ -1041,6 +1043,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { LiveInterval interval = _intervals[GetOperandId(dest)]; + if (interval.IsFixed) + { + interval.IsFixedAndUsed = true; + } + interval.SetStart(operationPos + 1); interval.AddUsePosition(operationPos + 1); } diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs index 77ad9541..d739ad28 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs @@ -27,6 +27,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public Register Register; public bool IsFixed; + public bool IsFixedAndUsed; } private readonly Data* _data; @@ -44,6 +45,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public ref int SpillOffset => ref _data->SpillOffset; public bool IsFixed => _data->IsFixed; + public ref bool IsFixedAndUsed => ref _data->IsFixedAndUsed; public bool IsEmpty => FirstRange == default; public bool IsSplit => Children.Count != 0; public bool IsSpilled => SpillOffset != -1; @@ -114,7 +116,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } else { - FirstRange = new LiveRange(position, position + 1); + FirstRange = new LiveRange(position, position + 1); End = position + 1; } } diff --git a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs index 5b11aac2..bc948f95 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs @@ -11,6 +11,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public int VecCallerSavedRegisters { get; } public int IntCalleeSavedRegisters { get; } public int VecCalleeSavedRegisters { get; } + public int RegistersCount { get; } public RegisterMasks( int intAvailableRegisters, @@ -18,7 +19,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators int intCallerSavedRegisters, int vecCallerSavedRegisters, int intCalleeSavedRegisters, - int vecCalleeSavedRegisters) + int vecCalleeSavedRegisters, + int registersCount) { IntAvailableRegisters = intAvailableRegisters; VecAvailableRegisters = vecAvailableRegisters; @@ -26,6 +28,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators VecCallerSavedRegisters = vecCallerSavedRegisters; IntCalleeSavedRegisters = intCalleeSavedRegisters; VecCalleeSavedRegisters = vecCalleeSavedRegisters; + RegistersCount = registersCount; } public int GetAvailableRegisters(RegisterType type) diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index e589da14..8b5a3fc5 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -16,6 +16,7 @@ namespace ARMeilleure.CodeGen.X86 { static class CodeGenerator { + private const int RegistersCount = 16; private const int PageSize = 0x1000; private const int StackGuardSize = 0x2000; @@ -143,7 +144,8 @@ namespace ARMeilleure.CodeGen.X86 CallingConvention.GetIntCallerSavedRegisters(), CallingConvention.GetVecCallerSavedRegisters(), CallingConvention.GetIntCalleeSavedRegisters(), - CallingConvention.GetVecCalleeSavedRegisters()); + CallingConvention.GetVecCalleeSavedRegisters(), + RegistersCount); AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks); diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs index 6407a9a7..8c909ac1 100644 --- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs +++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -5,8 +5,6 @@ namespace ARMeilleure.CodeGen.X86 { static class IntrinsicTable { - private const int BadOp = 0; - private static IntrinsicInfo[] _intrinTable; static IntrinsicTable() |