path: root/ARMeilleure/CodeGen
diff options
Diffstat (limited to 'ARMeilleure/CodeGen')
21 files changed, 5868 insertions, 60 deletions
diff --git a/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs b/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs
new file mode 100644
index 00000000..fdd4d024
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs
@@ -0,0 +1,270 @@
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+namespace ARMeilleure.CodeGen.Arm64
+ static class Arm64Optimizer
+ {
+ private const int MaxConstantUses = 10000;
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ var constants = new Dictionary<ulong, Operand>();
+ Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source)
+ {
+ // If the constant has many uses, we also force a new constant mov to be added, in order
+ // to avoid overflow of the counts field (that is limited to 16 bits).
+ if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses)
+ {
+ constant = Local(source.Type);
+ Operation copyOp = Operation(Instruction.Copy, constant, source);
+ block.Operations.AddBefore(operation, copyOp);
+ constants[source.Value] = constant;
+ }
+ return constant;
+ }
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ constants.Clear();
+ Operation nextNode;
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+ // Insert copies for constants that can't fit on a 32-bit immediate.
+ // Doing this early unblocks a few optimizations.
+ if (node.Instruction == Instruction.Add)
+ {
+ Operand src1 = node.GetSource(0);
+ Operand src2 = node.GetSource(1);
+ if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32)))
+ {
+ node.SetSource(0, GetConstantCopy(block, node, src1));
+ }
+ if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32)))
+ {
+ node.SetSource(1, GetConstantCopy(block, node, src2));
+ }
+ }
+ // Try to fold something like:
+ // lsl x1, x1, #2
+ // add x0, x0, x1
+ // ldr x0, [x0]
+ // add x2, x2, #16
+ // ldr x2, [x2]
+ // Into:
+ // ldr x0, [x0, x1, lsl #2]
+ // ldr x2, [x2, #16]
+ if (IsMemoryLoadOrStore(node.Instruction))
+ {
+ OperandType type;
+ if (node.Destination != default)
+ {
+ type = node.Destination.Type;
+ }
+ else
+ {
+ type = node.GetSource(1).Type;
+ }
+ Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type);
+ if (memOp != default)
+ {
+ node.SetSource(0, memOp);
+ }
+ }
+ }
+ }
+ Optimizer.RemoveUnusedNodes(cfg);
+ }
+ private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type)
+ {
+ Operand baseOp = addr;
+ // First we check if the address is the result of a local X with immediate
+ // addition. If that is the case, then the baseOp is X, and the memory operand immediate
+ // becomes the addition immediate. Otherwise baseOp keeps being the address.
+ int imm = GetConstOp(ref baseOp, type);
+ if (imm != 0)
+ {
+ return MemoryOp(type, baseOp, default, Multiplier.x1, imm);
+ }
+ // Now we check if the baseOp is the result of a local Y with a local Z addition.
+ // If that is the case, we now set baseOp to Y and indexOp to Z. We further check
+ // if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize),
+ // if that is the case, we set indexOp to W and adjust the scale value of the memory operand
+ // to match that of the left shift.
+ // There is one missed case, which is the address being a shift result, but this is
+ // probably not worth optimizing as it should never happen.
+ (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type);
+ // If baseOp is still equal to address, then there's nothing that can be optimized.
+ if (baseOp == addr)
+ {
+ return default;
+ }
+ return MemoryOp(type, baseOp, indexOp, scale, 0);
+ }
+ private static int GetConstOp(ref Operand baseOp, OperandType accessType)
+ {
+ Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
+ if (operation == default)
+ {
+ return 0;
+ }
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand constOp;
+ Operand otherOp;
+ if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
+ {
+ constOp = src1;
+ otherOp = src2;
+ }
+ else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
+ {
+ constOp = src2;
+ otherOp = src1;
+ }
+ else
+ {
+ return 0;
+ }
+ // If we have addition by a constant that we can't encode on the instruction,
+ // then we can't optimize it further.
+ if (ConstTooLong(constOp, accessType))
+ {
+ return 0;
+ }
+ baseOp = otherOp;
+ return constOp.AsInt32();
+ }
+ private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType)
+ {
+ Operand indexOp = default;
+ Multiplier scale = Multiplier.x1;
+ Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
+ if (addOp == default)
+ {
+ return (indexOp, scale);
+ }
+ Operand src1 = addOp.GetSource(0);
+ Operand src2 = addOp.GetSource(1);
+ if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
+ {
+ return (indexOp, scale);
+ }
+ baseOp = src1;
+ indexOp = src2;
+ Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
+ bool indexOnSrc2 = false;
+ if (shlOp == default)
+ {
+ shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
+ indexOnSrc2 = true;
+ }
+ if (shlOp != default)
+ {
+ Operand shSrc = shlOp.GetSource(0);
+ Operand shift = shlOp.GetSource(1);
+ int maxShift = Assembler.GetScaleForType(accessType);
+ if (shSrc.Kind == OperandKind.LocalVariable &&
+ shift.Kind == OperandKind.Constant &&
+ (shift.Value == 0 || shift.Value == (ulong)maxShift))
+ {
+ scale = shift.Value switch
+ {
+ 1 => Multiplier.x2,
+ 2 => Multiplier.x4,
+ 3 => Multiplier.x8,
+ 4 => Multiplier.x16,
+ _ => Multiplier.x1
+ };
+ baseOp = indexOnSrc2 ? src1 : src2;
+ indexOp = shSrc;
+ }
+ }
+ return (indexOp, scale);
+ }
+ private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
+ {
+ // If we have multiple assignments, folding is not safe
+ // as the value may be different depending on the
+ // control flow path.
+ if (op.AssignmentsCount != 1)
+ {
+ return default;
+ }
+ Operation asgOp = op.Assignments[0];
+ if (asgOp.Instruction != inst)
+ {
+ return default;
+ }
+ return asgOp;
+ }
+ private static bool IsMemoryLoadOrStore(Instruction inst)
+ {
+ return inst == Instruction.Load || inst == Instruction.Store;
+ }
+ private static bool ConstTooLong(Operand constOp, OperandType accessType)
+ {
+ if ((uint)constOp.Value != constOp.Value)
+ {
+ return true;
+ }
+ return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType);
+ }
+ }
diff --git a/ARMeilleure/CodeGen/Arm64/ArmCondition.cs b/ARMeilleure/CodeGen/Arm64/ArmCondition.cs
new file mode 100644
index 00000000..db27a810
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/ArmCondition.cs
@@ -0,0 +1,47 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+namespace ARMeilleure.CodeGen.Arm64
+ enum ArmCondition
+ {
+ Eq = 0,
+ Ne = 1,
+ GeUn = 2,
+ LtUn = 3,
+ Mi = 4,
+ Pl = 5,
+ Vs = 6,
+ Vc = 7,
+ GtUn = 8,
+ LeUn = 9,
+ Ge = 10,
+ Lt = 11,
+ Gt = 12,
+ Le = 13,
+ Al = 14,
+ Nv = 15
+ }
+ static class ComparisonArm64Extensions
+ {
+ public static ArmCondition ToArmCondition(this Comparison comp)
+ {
+ return comp switch
+ {
+ Comparison.Equal => ArmCondition.Eq,
+ Comparison.NotEqual => ArmCondition.Ne,
+ Comparison.Greater => ArmCondition.Gt,
+ Comparison.LessOrEqual => ArmCondition.Le,
+ Comparison.GreaterUI => ArmCondition.GtUn,
+ Comparison.LessOrEqualUI => ArmCondition.LeUn,
+ Comparison.GreaterOrEqual => ArmCondition.Ge,
+ Comparison.Less => ArmCondition.Lt,
+ Comparison.GreaterOrEqualUI => ArmCondition.GeUn,
+ Comparison.LessUI => ArmCondition.LtUn,
+ _ => throw new ArgumentException(null, nameof(comp))
+ };
+ }
+ }
diff --git a/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs b/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs
new file mode 100644
index 00000000..062a6d0b
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.Arm64
+ enum ArmExtensionType
+ {
+ Uxtb = 0,
+ Uxth = 1,
+ Uxtw = 2,
+ Uxtx = 3,
+ Sxtb = 4,
+ Sxth = 5,
+ Sxtw = 6,
+ Sxtx = 7
+ }
diff --git a/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs b/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs
new file mode 100644
index 00000000..d223a146
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.CodeGen.Arm64
+ enum ArmShiftType
+ {
+ Lsl = 0,
+ Lsr = 1,
+ Asr = 2,
+ Ror = 3
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/Assembler.cs b/ARMeilleure/CodeGen/Arm64/Assembler.cs
new file mode 100644
index 00000000..0ec0be7c
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/Assembler.cs
@@ -0,0 +1,1160 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+using System.IO;
+using static ARMeilleure.IntermediateRepresentation.Operand;
+namespace ARMeilleure.CodeGen.Arm64
+ class Assembler
+ {
+ public const uint SfFlag = 1u << 31;
+ private const int SpRegister = 31;
+ private const int ZrRegister = 31;
+ private readonly Stream _stream;
+ public Assembler(Stream stream)
+ {
+ _stream = stream;
+ }
+ public void Add(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x0b200000u, rd, rn, rm, extensionType, shiftAmount);
+ }
+ public void Add(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0, bool immForm = false)
+ {
+ WriteInstructionAuto(0x11000000u, 0x0b000000u, rd, rn, rm, shiftType, shiftAmount, immForm);
+ }
+ public void And(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x12000000u, 0x0a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+ public void Ands(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x72000000u, 0x6a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+ public void Asr(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Sbfm(rd, rn, shift, mask);
+ }
+ else
+ {
+ Asrv(rd, rn, rm);
+ }
+ }
+ public void Asrv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02800u, rd, rn, rm);
+ }
+ public void B(int imm)
+ {
+ WriteUInt32(0x14000000u | EncodeSImm26_2(imm));
+ }
+ public void B(ArmCondition condition, int imm)
+ {
+ WriteUInt32(0x54000000u | (uint)condition | (EncodeSImm19_2(imm) << 5));
+ }
+ public void Blr(Operand rn)
+ {
+ WriteUInt32(0xd63f0000u | (EncodeReg(rn) << 5));
+ }
+ public void Br(Operand rn)
+ {
+ WriteUInt32(0xd61f0000u | (EncodeReg(rn) << 5));
+ }
+ public void Brk()
+ {
+ WriteUInt32(0xd4200000u);
+ }
+ public void Cbz(Operand rt, int imm)
+ {
+ WriteInstructionAuto(0x34000000u | (EncodeSImm19_2(imm) << 5), rt);
+ }
+ public void Cbnz(Operand rt, int imm)
+ {
+ WriteInstructionAuto(0x35000000u | (EncodeSImm19_2(imm) << 5), rt);
+ }
+ public void Clrex(int crm = 15)
+ {
+ WriteUInt32(0xd503305fu | (EncodeUImm4(crm) << 8));
+ }
+ public void Clz(Operand rd, Operand rn)
+ {
+ WriteInstructionAuto(0x5ac01000u, rd, rn);
+ }
+ public void CmeqVector(Operand rd, Operand rn, Operand rm, int size, bool q = true)
+ {
+ Debug.Assert((uint)size < 4);
+ WriteSimdInstruction(0x2e208c00u | ((uint)size << 22), rd, rn, rm, q);
+ }
+ public void Cmp(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Subs(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+ }
+ public void Csel(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+ {
+ WriteInstructionBitwiseAuto(0x1a800000u | ((uint)condition << 12), rd, rn, rm);
+ }
+ public void Cset(Operand rd, ArmCondition condition)
+ {
+ var zr = Factory.Register(ZrRegister, RegisterType.Integer, rd.Type);
+ Csinc(rd, zr, zr, (ArmCondition)((int)condition ^ 1));
+ }
+ public void Csinc(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+ {
+ WriteInstructionBitwiseAuto(0x1a800400u | ((uint)condition << 12), rd, rn, rm);
+ }
+ public void Dmb(uint option)
+ {
+ WriteUInt32(0xd50330bfu | (option << 8));
+ }
+ public void DupScalar(Operand rd, Operand rn, int index, int size)
+ {
+ WriteInstruction(0x5e000400u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn);
+ }
+ public void Eor(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x52000000u, 0x4a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+ public void EorVector(Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ WriteSimdInstruction(0x2e201c00u, rd, rn, rm, q);
+ }
+ public void Extr(Operand rd, Operand rn, Operand rm, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionBitwiseAuto(0x13800000u | n | (EncodeUImm6(imms) << 10), rd, rn, rm);
+ }
+ public void FaddScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e202800u, rd, rn, rm);
+ }
+ public void FcvtScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e224000u | (rd.Type == OperandType.FP64 ? 1u << 15 : 1u << 22);
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+ public void FdivScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e201800u, rd, rn, rm);
+ }
+ public void Fmov(Operand rd, Operand rn)
+ {
+ WriteFPInstructionAuto(0x1e204000u, rd, rn);
+ }
+ public void Fmov(Operand rd, Operand rn, bool topHalf)
+ {
+ Debug.Assert(rd.Type.IsInteger() != rn.Type.IsInteger());
+ Debug.Assert(rd.Type == OperandType.I64 || rn.Type == OperandType.I64 || !topHalf);
+ uint opcode = rd.Type.IsInteger() ? 0b110u : 0b111u;
+ uint rmode = topHalf ? 1u << 19 : 0u;
+ uint ftype = rd.Type == OperandType.FP64 || rn.Type == OperandType.FP64 ? 1u << 22 : 0u;
+ uint sf = rd.Type == OperandType.I64 || rn.Type == OperandType.I64 ? SfFlag : 0u;
+ WriteUInt32(0x1e260000u | (opcode << 16) | rmode | ftype | sf | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+ public void FmulScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e200800u, rd, rn, rm);
+ }
+ public void FnegScalar(Operand rd, Operand rn)
+ {
+ WriteFPInstructionAuto(0x1e214000u, rd, rn);
+ }
+ public void FsubScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e203800u, rd, rn, rm);
+ }
+ public void Ins(Operand rd, Operand rn, int index, int size)
+ {
+ WriteInstruction(0x4e001c00u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn);
+ }
+ public void Ins(Operand rd, Operand rn, int srcIndex, int dstIndex, int size)
+ {
+ uint imm4 = (uint)srcIndex << size;
+ Debug.Assert((uint)srcIndex < (16u >> size));
+ WriteInstruction(0x6e000400u | (imm4 << 11) | (EncodeIndexSizeImm5(dstIndex, size) << 16), rd, rn);
+ }
+ public void Ldaxp(Operand rt, Operand rt2, Operand rn)
+ {
+ WriteInstruction(0x887f8000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rt2);
+ }
+ public void Ldaxr(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn);
+ }
+ public void Ldaxrb(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u, rt, rn);
+ }
+ public void Ldaxrh(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u | (1u << 30), rt, rn);
+ }
+ public void LdpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x28c00000u, 0x2cc00000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+ public void LdpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29c00000u, 0x2dc00000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+ public void LdpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29400000u, 0x2d400000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+ public void Ldr(Operand rt, Operand rn)
+ {
+ if (rn.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = rn.GetMemory();
+ if (memOp.Index != default)
+ {
+ Debug.Assert(memOp.Displacement == 0);
+ Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type));
+ LdrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1);
+ }
+ else
+ {
+ LdrRiUn(rt, memOp.BaseAddress, memOp.Displacement);
+ }
+ }
+ else
+ {
+ LdrRiUn(rt, rn, 0);
+ }
+ }
+ public void LdrLit(Operand rt, int offset)
+ {
+ uint instruction = 0x18000000u | (EncodeSImm19_2(offset) << 5);
+ if (rt.Type == OperandType.I64)
+ {
+ instruction |= 1u << 30;
+ }
+ WriteInstruction(instruction, rt);
+ }
+ public void LdrRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400400u, 0x3c400400u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void LdrRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400c00u, 0x3c400c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void LdrRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb9400000u, 0x3d400000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void LdrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8600800u, 0x3ce00800u, rt.Type);
+ WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+ }
+ public void LdrbRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38400400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void LdrbRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38400c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void LdrbRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x39400000u | (EncodeUImm12(imm, 0) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void LdrhRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78400400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void LdrhRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78400c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void LdrhRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x79400000u | (EncodeUImm12(imm, 1) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void Ldur(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400000u, 0x3c400000u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void Lsl(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Ubfm(rd, rn, -shift & mask, mask - shift);
+ }
+ else
+ {
+ Lslv(rd, rn, rm);
+ }
+ }
+ public void Lslv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02000u, rd, rn, rm);
+ }
+ public void Lsr(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Ubfm(rd, rn, shift, mask);
+ }
+ else
+ {
+ Lsrv(rd, rn, rm);
+ }
+ }
+ public void Lsrv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02400u, rd, rn, rm);
+ }
+ public void Madd(Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ WriteInstructionAuto(0x1b000000u, rd, rn, rm, ra);
+ }
+ public void Mul(Operand rd, Operand rn, Operand rm)
+ {
+ Madd(rd, rn, rm, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type));
+ }
+ public void Mov(Operand rd, Operand rn)
+ {
+ if (rd.Type.IsInteger())
+ {
+ Orr(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn);
+ }
+ else
+ {
+ OrrVector(rd, rn, rn);
+ }
+ }
+ public void MovSp(Operand rd, Operand rn)
+ {
+ if (rd.GetRegister().Index == SpRegister ||
+ rn.GetRegister().Index == SpRegister)
+ {
+ Add(rd, rn, Factory.Const(rd.Type, 0), immForm: true);
+ }
+ else
+ {
+ Mov(rd, rn);
+ }
+ }
+ public void Mov(Operand rd, int imm)
+ {
+ Movz(rd, imm, 0);
+ }
+ public void Movz(Operand rd, int imm, int hw)
+ {
+ Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+ WriteInstructionAuto(0x52800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+ }
+ public void Movk(Operand rd, int imm, int hw)
+ {
+ Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+ WriteInstructionAuto(0x72800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+ }
+ public void Mrs(Operand rt, uint o0, uint op1, uint crn, uint crm, uint op2)
+ {
+ uint instruction = 0xd5300000u;
+ instruction |= (op2 & 7) << 5;
+ instruction |= (crm & 15) << 8;
+ instruction |= (crn & 15) << 12;
+ instruction |= (op1 & 7) << 16;
+ instruction |= (o0 & 1) << 19;
+ WriteInstruction(instruction, rt);
+ }
+ public void Mvn(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Orn(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+ }
+ public void Neg(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Sub(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+ }
+ public void Orn(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x2a200000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+ public void Orr(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x32000000u, 0x2a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+ public void OrrVector(Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ WriteSimdInstruction(0x0ea01c00u, rd, rn, rm, q);
+ }
+ public void Ret(Operand rn)
+ {
+ WriteUInt32(0xd65f0000u | (EncodeReg(rn) << 5));
+ }
+ public void Rev(Operand rd, Operand rn)
+ {
+ uint opc0 = rd.Type == OperandType.I64 ? 1u << 10 : 0u;
+ WriteInstructionAuto(0x5ac00800u | opc0, rd, rn);
+ }
+ public void Ror(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Extr(rd, rn, rn, shift);
+ }
+ else
+ {
+ Rorv(rd, rn, rm);
+ }
+ }
+ public void Rorv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02c00u, rd, rn, rm);
+ }
+ public void Sbfm(Operand rd, Operand rn, int immr, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionAuto(0x13000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+ }
+ public void ScvtfScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e220000u;
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+ WriteFPInstructionAuto(instruction, rd, rn);
+ }
+ public void Sdiv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16Auto(0x1ac00c00u, rd, rn, rm);
+ }
+ public void Smulh(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16(0x9b407c00u, rd, rn, rm);
+ }
+ public void Stlxp(Operand rt, Operand rt2, Operand rn, Operand rs)
+ {
+ WriteInstruction(0x88208000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs, rt2);
+ }
+ public void Stlxr(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs);
+ }
+ public void Stlxrb(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u, rt, rn, rs);
+ }
+ public void Stlxrh(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u | (1u << 30), rt, rn, rs);
+ }
+ public void StpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x28800000u, 0x2c800000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+ public void StpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29800000u, 0x2d800000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+ public void StpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29000000u, 0x2d000000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+ public void Str(Operand rt, Operand rn)
+ {
+ if (rn.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = rn.GetMemory();
+ if (memOp.Index != default)
+ {
+ Debug.Assert(memOp.Displacement == 0);
+ Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type));
+ StrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1);
+ }
+ else
+ {
+ StrRiUn(rt, memOp.BaseAddress, memOp.Displacement);
+ }
+ }
+ else
+ {
+ StrRiUn(rt, rn, 0);
+ }
+ }
+ public void StrRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000400u, 0x3c000400u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void StrRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000c00u, 0x3c000c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void StrRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb9000000u, 0x3d000000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void StrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8200800u, 0x3ca00800u, rt.Type);
+ WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+ }
+ public void StrbRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38000400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void StrbRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38000c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void StrbRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x39000000u | (EncodeUImm12(imm, 0) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void StrhRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78000400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void StrhRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78000c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void StrhRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x79000000u | (EncodeUImm12(imm, 1) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void Stur(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000000u, 0x3c000000u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+ public void Sub(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x4b200000u, rd, rn, rm, extensionType, shiftAmount);
+ }
+ public void Sub(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x51000000u, 0x4b000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+ public void Subs(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x71000000u, 0x6b000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+ public void Sxtb(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 7);
+ }
+ public void Sxth(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 15);
+ }
+ public void Sxtw(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 31);
+ }
+ public void Tst(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Ands(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+ }
+ public void Ubfm(Operand rd, Operand rn, int immr, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionAuto(0x53000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+ }
+ public void UcvtfScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e230000u;
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+ WriteFPInstructionAuto(instruction, rd, rn);
+ }
+ public void Udiv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16Auto(0x1ac00800u, rd, rn, rm);
+ }
+ public void Umov(Operand rd, Operand rn, int index, int size)
+ {
+ uint q = size == 3 ? 1u << 30 : 0u;
+ WriteInstruction(0x0e003c00u | (EncodeIndexSizeImm5(index, size) << 16) | q, rd, rn);
+ }
+ public void Umulh(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16(0x9bc07c00u, rd, rn, rm);
+ }
+ public void Uxtb(Operand rd, Operand rn)
+ {
+ Ubfm(rd, rn, 0, 7);
+ }
+ public void Uxth(Operand rd, Operand rn)
+ {
+ Ubfm(rd, rn, 0, 15);
+ }
+ private void WriteInstructionAuto(
+ uint instI,
+ uint instR,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0,
+ bool immForm = false)
+ {
+ if (rm.Kind == OperandKind.Constant && (rm.Value != 0 || immForm))
+ {
+ Debug.Assert(shiftAmount == 0);
+ int imm = rm.AsInt32();
+ Debug.Assert((uint)imm == rm.Value);
+ if (imm != 0 && (imm & 0xfff) == 0)
+ {
+ instI |= 1 << 22; // sh flag
+ imm >>= 12;
+ }
+ WriteInstructionAuto(instI | (EncodeUImm12(imm, 0) << 10), rd, rn);
+ }
+ else
+ {
+ instR |= EncodeUImm6(shiftAmount) << 10;
+ instR |= (uint)shiftType << 22;
+ WriteInstructionRm16Auto(instR, rd, rn, rm);
+ }
+ }
+ private void WriteInstructionAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmExtensionType extensionType,
+ int shiftAmount = 0)
+ {
+ Debug.Assert((uint)shiftAmount <= 4);
+ instruction |= (uint)shiftAmount << 10;
+ instruction |= (uint)extensionType << 13;
+ WriteInstructionRm16Auto(instruction, rd, rn, rm);
+ }
+ private void WriteInstructionBitwiseAuto(
+ uint instI,
+ uint instR,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0)
+ {
+ if (rm.Kind == OperandKind.Constant && rm.Value != 0)
+ {
+ Debug.Assert(shiftAmount == 0);
+ bool canEncode = CodeGenCommon.TryEncodeBitMask(rm, out int immN, out int immS, out int immR);
+ Debug.Assert(canEncode);
+ uint instruction = instI | ((uint)immS << 10) | ((uint)immR << 16) | ((uint)immN << 22);
+ WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ WriteInstructionBitwiseAuto(instR, rd, rn, rm, shiftType, shiftAmount);
+ }
+ }
+ private void WriteInstructionBitwiseAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+ instruction |= EncodeUImm6(shiftAmount) << 10;
+ instruction |= (uint)shiftType << 22;
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+ private void WriteInstructionLdrStrAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmExtensionType extensionType,
+ bool shift)
+ {
+ if (shift)
+ {
+ instruction |= 1u << 12;
+ }
+ instruction |= (uint)extensionType << 13;
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= 1u << 30;
+ }
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+ private void WriteInstructionAuto(uint instruction, Operand rd)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+ WriteInstruction(instruction, rd);
+ }
+ public void WriteInstructionAuto(uint instruction, Operand rd, Operand rn)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+ WriteInstruction(instruction, rd, rn);
+ }
+ private void WriteInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+ WriteInstruction(instruction, rd, rn, rm, ra);
+ }
+ public void WriteInstruction(uint instruction, Operand rd)
+ {
+ WriteUInt32(instruction | EncodeReg(rd));
+ }
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 10));
+ }
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(ra) << 10) | (EncodeReg(rm) << 16));
+ }
+ private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn)
+ {
+ if (rd.Type == OperandType.FP64)
+ {
+ instruction |= 1u << 22;
+ }
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+ private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ if (rd.Type == OperandType.FP64)
+ {
+ instruction |= 1u << 22;
+ }
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+ private void WriteSimdInstruction(uint instruction, Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ if (q)
+ {
+ instruction |= 1u << 30;
+ }
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+ private void WriteInstructionRm16Auto(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+ public void WriteInstructionRm16(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16));
+ }
+ public void WriteInstructionRm16NoRet(uint instruction, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16));
+ }
+ private static uint GetLdpStpInstruction(uint intInst, uint vecInst, int imm, OperandType type)
+ {
+ uint instruction;
+ int scale;
+ if (type.IsInteger())
+ {
+ instruction = intInst;
+ if (type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ scale = 3;
+ }
+ else
+ {
+ scale = 2;
+ }
+ }
+ else
+ {
+ int opc = type switch
+ {
+ OperandType.FP32 => 0,
+ OperandType.FP64 => 1,
+ _ => 2
+ };
+ instruction = vecInst | ((uint)opc << 30);
+ scale = 2 + opc;
+ }
+ instruction |= (EncodeSImm7(imm, scale) << 15);
+ return instruction;
+ }
+ private static uint GetLdrStrInstruction(uint intInst, uint vecInst, OperandType type)
+ {
+ uint instruction;
+ if (type.IsInteger())
+ {
+ instruction = intInst;
+ if (type == OperandType.I64)
+ {
+ instruction |= 1 << 30;
+ }
+ }
+ else
+ {
+ instruction = vecInst;
+ if (type == OperandType.V128)
+ {
+ instruction |= 1u << 23;
+ }
+ else
+ {
+ instruction |= type == OperandType.FP32 ? 2u << 30 : 3u << 30;
+ }
+ }
+ return instruction;
+ }
+ private static uint EncodeIndexSizeImm5(int index, int size)
+ {
+ Debug.Assert((uint)size < 4);
+ Debug.Assert((uint)index < (16u >> size), $"Invalid index {index} and size {size} combination.");
+ return ((uint)index << (size + 1)) | (1u << size);
+ }
+ private static uint EncodeSImm7(int value, int scale)
+ {
+ uint imm = (uint)(value >> scale) & 0x7f;
+ Debug.Assert(((int)imm << 25) >> (25 - scale) == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+ return imm;
+ }
+ private static uint EncodeSImm9(int value)
+ {
+ uint imm = (uint)value & 0x1ff;
+ Debug.Assert(((int)imm << 23) >> 23 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+ private static uint EncodeSImm19_2(int value)
+ {
+ uint imm = (uint)(value >> 2) & 0x7ffff;
+ Debug.Assert(((int)imm << 13) >> 11 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+ private static uint EncodeSImm26_2(int value)
+ {
+ uint imm = (uint)(value >> 2) & 0x3ffffff;
+ Debug.Assert(((int)imm << 6) >> 4 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+ private static uint EncodeUImm4(int value)
+ {
+ uint imm = (uint)value & 0xf;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+ private static uint EncodeUImm6(int value)
+ {
+ uint imm = (uint)value & 0x3f;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+ private static uint EncodeUImm12(int value, OperandType type)
+ {
+ return EncodeUImm12(value, GetScaleForType(type));
+ }
+ private static uint EncodeUImm12(int value, int scale)
+ {
+ uint imm = (uint)(value >> scale) & 0xfff;
+ Debug.Assert((int)imm << scale == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+ return imm;
+ }
+ private static uint EncodeUImm16(int value)
+ {
+ uint imm = (uint)value & 0xffff;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+ private static uint EncodeReg(Operand reg)
+ {
+ if (reg.Kind == OperandKind.Constant && reg.Value == 0)
+ {
+ return ZrRegister;
+ }
+ uint regIndex = (uint)reg.GetRegister().Index;
+ Debug.Assert(reg.Kind == OperandKind.Register);
+ Debug.Assert(regIndex < 32);
+ return regIndex;
+ }
+ public static int GetScaleForType(OperandType type)
+ {
+ return type switch
+ {
+ OperandType.I32 => 2,
+ OperandType.I64 => 3,
+ OperandType.FP32 => 2,
+ OperandType.FP64 => 3,
+ OperandType.V128 => 4,
+ _ => throw new ArgumentException($"Invalid type {type}.")
+ };
+ }
+ private void WriteInt16(short value)
+ {
+ WriteUInt16((ushort)value);
+ }
+ private void WriteInt32(int value)
+ {
+ WriteUInt32((uint)value);
+ }
+ private void WriteByte(byte value)
+ {
+ _stream.WriteByte(value);
+ }
+ private void WriteUInt16(ushort value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ }
+ private void WriteUInt32(uint value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ }
+ }
diff --git a/ARMeilleure/CodeGen/Arm64/CallingConvention.cs b/ARMeilleure/CodeGen/Arm64/CallingConvention.cs
new file mode 100644
index 00000000..fda8d786
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/CallingConvention.cs
@@ -0,0 +1,96 @@
+using System;
+namespace ARMeilleure.CodeGen.Arm64
+ static class CallingConvention
+ {
+ private const int RegistersMask = unchecked((int)0xffffffff);
+ // Some of those register have specific roles and can't be used as general purpose registers.
+ // X18 - Reserved for platform specific usage.
+ // X29 - Frame pointer.
+ // X30 - Return address.
+ // X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
+ private const int ReservedRegsMask = (1 << CodeGenCommon.ReservedRegister) | (1 << 18) | (1 << 29) | (1 << 30) | (1 << 31);
+ public static int GetIntAvailableRegisters()
+ {
+ return RegistersMask & ~ReservedRegsMask;
+ }
+ public static int GetVecAvailableRegisters()
+ {
+ return RegistersMask;
+ }
+ public static int GetIntCallerSavedRegisters()
+ {
+ return (GetIntCalleeSavedRegisters() ^ RegistersMask) & ~ReservedRegsMask;
+ }
+ public static int GetFpCallerSavedRegisters()
+ {
+ return GetFpCalleeSavedRegisters() ^ RegistersMask;
+ }
+ public static int GetVecCallerSavedRegisters()
+ {
+ return GetVecCalleeSavedRegisters() ^ RegistersMask;
+ }
+ public static int GetIntCalleeSavedRegisters()
+ {
+ return 0x1ff80000; // X19 to X28
+ }
+ public static int GetFpCalleeSavedRegisters()
+ {
+ return 0xff00; // D8 to D15
+ }
+ public static int GetVecCalleeSavedRegisters()
+ {
+ return 0;
+ }
+ public static int GetArgumentsOnRegsCount()
+ {
+ return 8;
+ }
+ public static int GetIntArgumentRegister(int index)
+ {
+ if ((uint)index < (uint)GetArgumentsOnRegsCount())
+ {
+ return index;
+ }
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ public static int GetVecArgumentRegister(int index)
+ {
+ if ((uint)index < (uint)GetArgumentsOnRegsCount())
+ {
+ return index;
+ }
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ public static int GetIntReturnRegister()
+ {
+ return 0;
+ }
+ public static int GetIntReturnRegisterHigh()
+ {
+ return 1;
+ }
+ public static int GetVecReturnRegister()
+ {
+ return 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs b/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
new file mode 100644
index 00000000..e67d2fdb
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
@@ -0,0 +1,173 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Numerics;
+namespace ARMeilleure.CodeGen.Arm64
+ static class CodeGenCommon
+ {
+ public const int TcAddressRegister = 8;
+ public const int ReservedRegister = 17;
+ public static bool ConstFitsOnSImm7(int value, int scale)
+ {
+ return (((value >> scale) << 25) >> (25 - scale)) == value;
+ }
+ public static bool ConstFitsOnSImm9(int value)
+ {
+ return ((value << 23) >> 23) == value;
+ }
+ public static bool ConstFitsOnUImm12(int value)
+ {
+ return (value & 0xfff) == value;
+ }
+ public static bool ConstFitsOnUImm12(int value, OperandType type)
+ {
+ int scale = Assembler.GetScaleForType(type);
+ return (((value >> scale) & 0xfff) << scale) == value;
+ }
+ public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR)
+ {
+ ulong value = operand.Value;
+ if (operand.Type == OperandType.I32)
+ {
+ value |= value << 32;
+ }
+ return TryEncodeBitMask(value, out immN, out immS, out immR);
+ }
+ public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR)
+ {
+ // Some special values also can't be encoded:
+ // 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0).
+ // A value with all bits set can't be encoded because it is reserved according to the spec, because:
+ // Any value AND all ones will be equal itself, so it's effectively a no-op.
+ // Any value OR all ones will be equal all ones, so one can just use MOV.
+ // Any value XOR all ones will be equal its inverse, so one can just use MVN.
+ if (value == ulong.MaxValue)
+ {
+ immN = 0;
+ immS = 0;
+ immR = 0;
+ return false;
+ }
+ int bitLength = CountSequence(value);
+ if ((value >> bitLength) != 0)
+ {
+ bitLength += CountSequence(value >> bitLength);
+ }
+ int bitLengthLog2 = BitOperations.Log2((uint)bitLength);
+ int bitLengthPow2 = 1 << bitLengthLog2;
+ if (bitLengthPow2 < bitLength)
+ {
+ bitLengthLog2++;
+ bitLengthPow2 <<= 1;
+ }
+ int selectedESize = 64;
+ int repetitions = 1;
+ int onesCount = BitOperations.PopCount(value);
+ if (bitLengthPow2 < 64 && (value >> bitLengthPow2) != 0)
+ {
+ for (int eSizeLog2 = bitLengthLog2; eSizeLog2 < 6; eSizeLog2++)
+ {
+ bool match = true;
+ int eSize = 1 << eSizeLog2;
+ ulong mask = (1UL << eSize) - 1;
+ ulong eValue = value & mask;
+ for (int e = 1; e < 64 / eSize; e++)
+ {
+ if (((value >> (e * eSize)) & mask) != eValue)
+ {
+ match = false;
+ break;
+ }
+ }
+ if (match)
+ {
+ selectedESize = eSize;
+ repetitions = 64 / eSize;
+ onesCount = BitOperations.PopCount(eValue);
+ break;
+ }
+ }
+ }
+ // Find rotation. We have two cases, one where the highest bit is 0
+ // and one where it is 1.
+ // If it's 1, we just need to count the number of 1 bits on the MSB to find the right rotation.
+ // If it's 0, we just need to count the number of 0 bits on the LSB to find the left rotation,
+ // then we can convert it to the right rotation shift by subtracting the value from the element size.
+ int rotation;
+ long vHigh = (long)(value << (64 - selectedESize));
+ if (vHigh < 0)
+ {
+ rotation = BitOperations.LeadingZeroCount(~(ulong)vHigh);
+ }
+ else
+ {
+ rotation = (selectedESize - BitOperations.TrailingZeroCount(value)) & (selectedESize - 1);
+ }
+ // Reconstruct value and see if it matches. If not, we can't encode.
+ ulong reconstructed = onesCount == 64 ? ulong.MaxValue : RotateRight((1UL << onesCount) - 1, rotation, selectedESize);
+ for (int bit = 32; bit >= selectedESize; bit >>= 1)
+ {
+ reconstructed |= reconstructed << bit;
+ }
+ if (reconstructed != value || onesCount == 0)
+ {
+ immN = 0;
+ immS = 0;
+ immR = 0;
+ return false;
+ }
+ immR = rotation;
+ // immN indicates that there are no repetitions.
+ // The MSB of immS indicates the amount of repetitions, and the LSB the number of bits set.
+ if (repetitions == 1)
+ {
+ immN = 1;
+ immS = 0;
+ }
+ else
+ {
+ immN = 0;
+ immS = (0xf80 >> BitOperations.Log2((uint)repetitions)) & 0x3f;
+ }
+ immS |= onesCount - 1;
+ return true;
+ }
+ private static int CountSequence(ulong value)
+ {
+ return BitOperations.TrailingZeroCount(value) + BitOperations.TrailingZeroCount(~value);
+ }
+ private static ulong RotateRight(ulong bits, int shift, int size)
+ {
+ return (bits >> shift) | ((bits << (size - shift)) & (size == 64 ? ulong.MaxValue : (1UL << size) - 1));
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs b/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs
new file mode 100644
index 00000000..1ddde0c1
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs
@@ -0,0 +1,286 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.IO;
+namespace ARMeilleure.CodeGen.Arm64
+ class CodeGenContext
+ {
+ private const int BccInstLength = 4;
+ private const int CbnzInstLength = 4;
+ private const int LdrLitInstLength = 4;
+ private Stream _stream;
+ public int StreamOffset => (int)_stream.Length;
+ public AllocationResult AllocResult { get; }
+ public Assembler Assembler { get; }
+ public BasicBlock CurrBlock { get; private set; }
+ public bool HasCall { get; }
+ public int CallArgsRegionSize { get; }
+ public int FpLrSaveRegionSize { get; }
+ private readonly Dictionary<BasicBlock, long> _visitedBlocks;
+ private readonly Dictionary<BasicBlock, List<(ArmCondition Condition, long BranchPos)>> _pendingBranches;
+ private struct ConstantPoolEntry
+ {
+ public readonly int Offset;
+ public readonly Symbol Symbol;
+ public readonly List<(Operand, int)> LdrOffsets;
+ public ConstantPoolEntry(int offset, Symbol symbol)
+ {
+ Offset = offset;
+ Symbol = symbol;
+ LdrOffsets = new List<(Operand, int)>();
+ }
+ }
+ private readonly Dictionary<ulong, ConstantPoolEntry> _constantPool;
+ private bool _constantPoolWritten;
+ private long _constantPoolOffset;
+ private ArmCondition _jNearCondition;
+ private Operand _jNearValue;
+ private long _jNearPosition;
+ private readonly bool _relocatable;
+ public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable)
+ {
+ _stream = new MemoryStream();
+ AllocResult = allocResult;
+ Assembler = new Assembler(_stream);
+ bool hasCall = maxCallArgs >= 0;
+ HasCall = hasCall;
+ if (maxCallArgs < 0)
+ {
+ maxCallArgs = 0;
+ }
+ CallArgsRegionSize = maxCallArgs * 16;
+ FpLrSaveRegionSize = hasCall ? 16 : 0;
+ _visitedBlocks = new Dictionary<BasicBlock, long>();
+ _pendingBranches = new Dictionary<BasicBlock, List<(ArmCondition, long)>>();
+ _constantPool = new Dictionary<ulong, ConstantPoolEntry>();
+ _relocatable = relocatable;
+ }
+ public void EnterBlock(BasicBlock block)
+ {
+ CurrBlock = block;
+ long target = _stream.Position;
+ if (_pendingBranches.TryGetValue(block, out var list))
+ {
+ foreach (var tuple in list)
+ {
+ _stream.Seek(tuple.BranchPos, SeekOrigin.Begin);
+ WriteBranch(tuple.Condition, target);
+ }
+ _stream.Seek(target, SeekOrigin.Begin);
+ _pendingBranches.Remove(block);
+ }
+ _visitedBlocks.Add(block, target);
+ }
+ public void JumpTo(BasicBlock target)
+ {
+ JumpTo(ArmCondition.Al, target);
+ }
+ public void JumpTo(ArmCondition condition, BasicBlock target)
+ {
+ if (_visitedBlocks.TryGetValue(target, out long offset))
+ {
+ WriteBranch(condition, offset);
+ }
+ else
+ {
+ if (!_pendingBranches.TryGetValue(target, out var list))
+ {
+ list = new List<(ArmCondition, long)>();
+ _pendingBranches.Add(target, list);
+ }
+ list.Add((condition, _stream.Position));
+ _stream.Seek(BccInstLength, SeekOrigin.Current);
+ }
+ }
+ private void WriteBranch(ArmCondition condition, long to)
+ {
+ int imm = checked((int)(to - _stream.Position));
+ if (condition != ArmCondition.Al)
+ {
+ Assembler.B(condition, imm);
+ }
+ else
+ {
+ Assembler.B(imm);
+ }
+ }
+ public void JumpToNear(ArmCondition condition)
+ {
+ _jNearCondition = condition;
+ _jNearPosition = _stream.Position;
+ _stream.Seek(BccInstLength, SeekOrigin.Current);
+ }
+ public void JumpToNearIfNotZero(Operand value)
+ {
+ _jNearValue = value;
+ _jNearPosition = _stream.Position;
+ _stream.Seek(CbnzInstLength, SeekOrigin.Current);
+ }
+ public void JumpHere()
+ {
+ long currentPosition = _stream.Position;
+ long offset = currentPosition - _jNearPosition;
+ _stream.Seek(_jNearPosition, SeekOrigin.Begin);
+ if (_jNearValue != default)
+ {
+ Assembler.Cbnz(_jNearValue, checked((int)offset));
+ _jNearValue = default;
+ }
+ else
+ {
+ Assembler.B(_jNearCondition, checked((int)offset));
+ }
+ _stream.Seek(currentPosition, SeekOrigin.Begin);
+ }
+ public void ReserveRelocatableConstant(Operand rt, Symbol symbol, ulong value)
+ {
+ if (!_constantPool.TryGetValue(value, out ConstantPoolEntry cpe))
+ {
+ cpe = new ConstantPoolEntry(_constantPool.Count * sizeof(ulong), symbol);
+ _constantPool.Add(value, cpe);
+ }
+ cpe.LdrOffsets.Add((rt, (int)_stream.Position));
+ _stream.Seek(LdrLitInstLength, SeekOrigin.Current);
+ }
+ private long WriteConstantPool()
+ {
+ if (_constantPoolWritten)
+ {
+ return _constantPoolOffset;
+ }
+ long constantPoolBaseOffset = _stream.Position;
+ foreach (ulong value in _constantPool.Keys)
+ {
+ WriteUInt64(value);
+ }
+ foreach (ConstantPoolEntry cpe in _constantPool.Values)
+ {
+ foreach ((Operand rt, int ldrOffset) in cpe.LdrOffsets)
+ {
+ _stream.Seek(ldrOffset, SeekOrigin.Begin);
+ int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
+ int pcRelativeOffset = absoluteOffset - ldrOffset;
+ Assembler.LdrLit(rt, pcRelativeOffset);
+ }
+ }
+ _stream.Seek(constantPoolBaseOffset + _constantPool.Count * sizeof(ulong), SeekOrigin.Begin);
+ _constantPoolOffset = constantPoolBaseOffset;
+ _constantPoolWritten = true;
+ return constantPoolBaseOffset;
+ }
+ public (byte[], RelocInfo) GetCode()
+ {
+ long constantPoolBaseOffset = WriteConstantPool();
+ byte[] code = new byte[_stream.Length];
+ long originalPosition = _stream.Position;
+ _stream.Seek(0, SeekOrigin.Begin);
+ _stream.Read(code, 0, code.Length);
+ _stream.Seek(originalPosition, SeekOrigin.Begin);
+ RelocInfo relocInfo;
+ if (_relocatable)
+ {
+ RelocEntry[] relocs = new RelocEntry[_constantPool.Count];
+ int index = 0;
+ foreach (ConstantPoolEntry cpe in _constantPool.Values)
+ {
+ if (cpe.Symbol.Type != SymbolType.None)
+ {
+ int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
+ relocs[index++] = new RelocEntry(absoluteOffset, cpe.Symbol);
+ }
+ }
+ if (index != relocs.Length)
+ {
+ Array.Resize(ref relocs, index);
+ }
+ relocInfo = new RelocInfo(relocs);
+ }
+ else
+ {
+ relocInfo = new RelocInfo(new RelocEntry[0]);
+ }
+ return (code, relocInfo);
+ }
+ private void WriteUInt64(ulong value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ _stream.WriteByte((byte)(value >> 32));
+ _stream.WriteByte((byte)(value >> 40));
+ _stream.WriteByte((byte)(value >> 48));
+ _stream.WriteByte((byte)(value >> 56));
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs b/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
new file mode 100644
index 00000000..704aa45a
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
@@ -0,0 +1,1576 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+using static ARMeilleure.IntermediateRepresentation.Operand;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+namespace ARMeilleure.CodeGen.Arm64
+ static class CodeGenerator
+ {
+ private const int DWordScale = 3;
+ private const int RegistersCount = 32;
+ private const int FpRegister = 29;
+ private const int LrRegister = 30;
+ private const int SpRegister = 31;
+ private const int ZrRegister = 31;
+ private enum AccessSize
+ {
+ Byte,
+ Hword,
+ Auto
+ }
+ private static Action<CodeGenContext, Operation>[] _instTable;
+ static CodeGenerator()
+ {
+ _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))];
+ Add(Instruction.Add, GenerateAdd);
+ Add(Instruction.BitwiseAnd, GenerateBitwiseAnd);
+ Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr);
+ Add(Instruction.BitwiseNot, GenerateBitwiseNot);
+ Add(Instruction.BitwiseOr, GenerateBitwiseOr);
+ Add(Instruction.BranchIf, GenerateBranchIf);
+ Add(Instruction.ByteSwap, GenerateByteSwap);
+ Add(Instruction.Call, GenerateCall);
+ //Add(Instruction.Clobber, GenerateClobber);
+ Add(Instruction.Compare, GenerateCompare);
+ Add(Instruction.CompareAndSwap, GenerateCompareAndSwap);
+ Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16);
+ Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8);
+ Add(Instruction.ConditionalSelect, GenerateConditionalSelect);
+ Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32);
+ Add(Instruction.ConvertToFP, GenerateConvertToFP);
+ Add(Instruction.ConvertToFPUI, GenerateConvertToFPUI);
+ Add(Instruction.Copy, GenerateCopy);
+ Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
+ Add(Instruction.Divide, GenerateDivide);
+ Add(Instruction.DivideUI, GenerateDivideUI);
+ Add(Instruction.Fill, GenerateFill);
+ Add(Instruction.Load, GenerateLoad);
+ Add(Instruction.Load16, GenerateLoad16);
+ Add(Instruction.Load8, GenerateLoad8);
+ Add(Instruction.MemoryBarrier, GenerateMemoryBarrier);
+ Add(Instruction.Multiply, GenerateMultiply);
+ Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI);
+ Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI);
+ Add(Instruction.Negate, GenerateNegate);
+ Add(Instruction.Return, GenerateReturn);
+ Add(Instruction.RotateRight, GenerateRotateRight);
+ Add(Instruction.ShiftLeft, GenerateShiftLeft);
+ Add(Instruction.ShiftRightSI, GenerateShiftRightSI);
+ Add(Instruction.ShiftRightUI, GenerateShiftRightUI);
+ Add(Instruction.SignExtend16, GenerateSignExtend16);
+ Add(Instruction.SignExtend32, GenerateSignExtend32);
+ Add(Instruction.SignExtend8, GenerateSignExtend8);
+ Add(Instruction.Spill, GenerateSpill);
+ Add(Instruction.SpillArg, GenerateSpillArg);
+ Add(Instruction.StackAlloc, GenerateStackAlloc);
+ Add(Instruction.Store, GenerateStore);
+ Add(Instruction.Store16, GenerateStore16);
+ Add(Instruction.Store8, GenerateStore8);
+ Add(Instruction.Subtract, GenerateSubtract);
+ Add(Instruction.Tailcall, GenerateTailcall);
+ Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar);
+ Add(Instruction.VectorExtract, GenerateVectorExtract);
+ Add(Instruction.VectorExtract16, GenerateVectorExtract16);
+ Add(Instruction.VectorExtract8, GenerateVectorExtract8);
+ Add(Instruction.VectorInsert, GenerateVectorInsert);
+ Add(Instruction.VectorInsert16, GenerateVectorInsert16);
+ Add(Instruction.VectorInsert8, GenerateVectorInsert8);
+ Add(Instruction.VectorOne, GenerateVectorOne);
+ Add(Instruction.VectorZero, GenerateVectorZero);
+ Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64);
+ Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96);
+ Add(Instruction.ZeroExtend16, GenerateZeroExtend16);
+ Add(Instruction.ZeroExtend32, GenerateZeroExtend32);
+ Add(Instruction.ZeroExtend8, GenerateZeroExtend8);
+ static void Add(Instruction inst, Action<CodeGenContext, Operation> func)
+ {
+ _instTable[(int)inst] = func;
+ }
+ }
+ public static CompiledFunction Generate(CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+ Logger.StartPass(PassName.Optimization);
+ if (cctx.Options.HasFlag(CompilerOptions.Optimize))
+ {
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Optimizer.RunPass(cfg);
+ }
+ BlockPlacement.RunPass(cfg);
+ }
+ Arm64Optimizer.RunPass(cfg);
+ Logger.EndPass(PassName.Optimization, cfg);
+ Logger.StartPass(PassName.PreAllocation);
+ StackAllocator stackAlloc = new();
+ PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs);
+ Logger.EndPass(PassName.PreAllocation, cfg);
+ Logger.StartPass(PassName.RegisterAllocation);
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Ssa.Deconstruct(cfg);
+ }
+ IRegisterAllocator regAlloc;
+ if (cctx.Options.HasFlag(CompilerOptions.Lsra))
+ {
+ regAlloc = new LinearScanAllocator();
+ }
+ else
+ {
+ regAlloc = new HybridAllocator();
+ }
+ RegisterMasks regMasks = new(
+ CallingConvention.GetIntAvailableRegisters(),
+ CallingConvention.GetVecAvailableRegisters(),
+ CallingConvention.GetIntCallerSavedRegisters(),
+ CallingConvention.GetVecCallerSavedRegisters(),
+ CallingConvention.GetIntCalleeSavedRegisters(),
+ CallingConvention.GetVecCalleeSavedRegisters(),
+ RegistersCount);
+ AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
+ Logger.EndPass(PassName.RegisterAllocation, cfg);
+ Logger.StartPass(PassName.CodeGeneration);
+ //Console.Error.WriteLine(IRDumper.GetDump(cfg));
+ bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
+ CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);
+ UnwindInfo unwindInfo = WritePrologue(context);
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ context.EnterBlock(block);
+ for (Operation node = block.Operations.First; node != default;)
+ {
+ node = GenerateOperation(context, node);
+ }
+ if (block.SuccessorsCount == 0)
+ {
+ // The only blocks which can have 0 successors are exit blocks.
+ Operation last = block.Operations.Last;
+ Debug.Assert(last.Instruction == Instruction.Tailcall ||
+ last.Instruction == Instruction.Return);
+ }
+ else
+ {
+ BasicBlock succ = block.GetSuccessor(0);
+ if (succ != block.ListNext)
+ {
+ context.JumpTo(succ);
+ }
+ }
+ }
+ (byte[] code, RelocInfo relocInfo) = context.GetCode();
+ Logger.EndPass(PassName.CodeGeneration);
+ return new CompiledFunction(code, unwindInfo, relocInfo);
+ }
+ private static Operation GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ if (operation.Instruction == Instruction.Extended)
+ {
+ CodeGeneratorIntrinsic.GenerateOperation(context, operation);
+ }
+ else
+ {
+ if (IsLoadOrStore(operation) &&
+ operation.ListNext != default &&
+ operation.ListNext.Instruction == operation.Instruction &&
+ TryPairMemoryOp(context, operation, operation.ListNext))
+ {
+ // Skip next operation if we managed to pair them.
+ return operation.ListNext.ListNext;
+ }
+ Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction];
+ if (func != null)
+ {
+ func(context, operation);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\".");
+ }
+ }
+ return operation.ListNext;
+ }
+ private static void GenerateAdd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ // ValidateBinOp(dest, src1, src2);
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Add(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FaddScalar(dest, src1, src2);
+ }
+ }
+ private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ ValidateBinOp(dest, src1, src2);
+ Debug.Assert(dest.Type.IsInteger());
+ context.Assembler.And(dest, src1, src2);
+ }
+ private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ ValidateBinOp(dest, src1, src2);
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Eor(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.EorVector(dest, src1, src2);
+ }
+ }
+ private static void GenerateBitwiseNot(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ ValidateUnOp(dest, source);
+ Debug.Assert(dest.Type.IsInteger());
+ context.Assembler.Mvn(dest, source);
+ }
+ private static void GenerateBitwiseOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ ValidateBinOp(dest, src1, src2);
+ Debug.Assert(dest.Type.IsInteger());
+ context.Assembler.Orr(dest, src1, src2);
+ }
+ private static void GenerateBranchIf(CodeGenContext context, Operation operation)
+ {
+ Operand comp = operation.GetSource(2);
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+ var cond = ((Comparison)comp.AsInt32()).ToArmCondition();
+ GenerateCompareCommon(context, operation);
+ context.JumpTo(cond, context.CurrBlock.GetSuccessor(1));
+ }
+ private static void GenerateByteSwap(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ ValidateUnOp(dest, source);
+ Debug.Assert(dest.Type.IsInteger());
+ context.Assembler.Rev(dest, source);
+ }
+ private static void GenerateCall(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Blr(operation.GetSource(0));
+ }
+ private static void GenerateCompare(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand comp = operation.GetSource(2);
+ Debug.Assert(dest.Type == OperandType.I32);
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+ var cond = ((Comparison)comp.AsInt32()).ToArmCondition();
+ GenerateCompareCommon(context, operation);
+ context.Assembler.Cset(dest, cond);
+ }
+ private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation)
+ {
+ if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3.
+ {
+ Operand actualLow = operation.GetDestination(0);
+ Operand actualHigh = operation.GetDestination(1);
+ Operand temp0 = operation.GetDestination(2);
+ Operand temp1 = operation.GetDestination(3);
+ Operand address = operation.GetSource(0);
+ Operand expectedLow = operation.GetSource(1);
+ Operand expectedHigh = operation.GetSource(2);
+ Operand desiredLow = operation.GetSource(3);
+ Operand desiredHigh = operation.GetSource(4);
+ GenerateAtomicDcas(
+ context,
+ address,
+ expectedLow,
+ expectedHigh,
+ desiredLow,
+ desiredHigh,
+ actualLow,
+ actualHigh,
+ temp0,
+ temp1);
+ }
+ else
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Auto);
+ }
+ }
+ private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation)
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Hword);
+ }
+ private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation)
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Byte);
+ }
+ private static void GenerateCompareCommon(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ EnsureSameType(src1, src2);
+ Debug.Assert(src1.Type.IsInteger());
+ context.Assembler.Cmp(src1, src2);
+ }
+ private static void GenerateConditionalSelect(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+ EnsureSameType(dest, src2, src3);
+ Debug.Assert(dest.Type.IsInteger());
+ Debug.Assert(src1.Type == OperandType.I32);
+ context.Assembler.Cmp (src1, Const(src1.Type, 0));
+ context.Assembler.Csel(dest, src2, src3, ArmCondition.Ne);
+ }
+ private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64);
+ context.Assembler.Mov(dest, Register(source, OperandType.I32));
+ }
+ private static void GenerateConvertToFP(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+ Debug.Assert(dest.Type != source.Type);
+ Debug.Assert(source.Type != OperandType.V128);
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.ScvtfScalar(dest, source);
+ }
+ else
+ {
+ context.Assembler.FcvtScalar(dest, source);
+ }
+ }
+ private static void GenerateConvertToFPUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+ Debug.Assert(dest.Type != source.Type);
+ Debug.Assert(source.Type.IsInteger());
+ context.Assembler.UcvtfScalar(dest, source);
+ }
+ private static void GenerateCopy(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ EnsureSameType(dest, source);
+ Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
+ // Moves to the same register are useless.
+ if (dest.Kind == source.Kind && dest.Value == source.Value)
+ {
+ return;
+ }
+ if (dest.Kind == OperandKind.Register && source.Kind == OperandKind.Constant)
+ {
+ if (source.Relocatable)
+ {
+ context.ReserveRelocatableConstant(dest, source.Symbol, source.Value);
+ }
+ else
+ {
+ GenerateConstantCopy(context, dest, source.Value);
+ }
+ }
+ else
+ {
+ context.Assembler.Mov(dest, source);
+ }
+ }
+ private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ EnsureSameType(dest, source);
+ Debug.Assert(dest.Type.IsInteger());
+ context.Assembler.Clz(dest, source);
+ }
+ private static void GenerateDivide(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+ ValidateBinOp(dest, dividend, divisor);
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sdiv(dest, dividend, divisor);
+ }
+ else
+ {
+ context.Assembler.FdivScalar(dest, dividend, divisor);
+ }
+ }
+ private static void GenerateDivideUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+ ValidateBinOp(dest, dividend, divisor);
+ context.Assembler.Udiv(dest, dividend, divisor);
+ }
+ private static void GenerateLoad(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+ context.Assembler.Ldr(value, address);
+ }
+ private static void GenerateLoad16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+ Debug.Assert(value.Type.IsInteger());
+ context.Assembler.LdrhRiUn(value, address, 0);
+ }
+ private static void GenerateLoad8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+ Debug.Assert(value.Type.IsInteger());
+ context.Assembler.LdrbRiUn(value, address, 0);
+ }
+ private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Dmb(0xf);
+ }
+ private static void GenerateMultiply(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ EnsureSameType(dest, src1, src2);
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Mul(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FmulScalar(dest, src1, src2);
+ }
+ }
+ private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ EnsureSameType(dest, src1, src2);
+ Debug.Assert(dest.Type == OperandType.I64);
+ context.Assembler.Smulh(dest, src1, src2);
+ }
+ private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ EnsureSameType(dest, src1, src2);
+ Debug.Assert(dest.Type == OperandType.I64);
+ context.Assembler.Umulh(dest, src1, src2);
+ }
+ private static void GenerateNegate(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ ValidateUnOp(dest, source);
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Neg(dest, source);
+ }
+ else
+ {
+ context.Assembler.FnegScalar(dest, source);
+ }
+ }
+ private static void GenerateLoad(CodeGenContext context, Operand value, Operand address, int offset)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type))
+ {
+ context.Assembler.LdrRiUn(value, address, offset);
+ }
+ else if (CodeGenCommon.ConstFitsOnSImm9(offset))
+ {
+ context.Assembler.Ldur(value, address, offset);
+ }
+ else
+ {
+ Operand tempAddress = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempAddress, (ulong)offset);
+ context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input.
+ context.Assembler.LdrRiUn(value, tempAddress, 0);
+ }
+ }
+ private static void GenerateReturn(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+ context.Assembler.Ret(Register(LrRegister));
+ }
+ private static void GenerateRotateRight(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ ValidateShift(dest, src1, src2);
+ context.Assembler.Ror(dest, src1, src2);
+ }
+ private static void GenerateShiftLeft(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ ValidateShift(dest, src1, src2);
+ context.Assembler.Lsl(dest, src1, src2);
+ }
+ private static void GenerateShiftRightSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ ValidateShift(dest, src1, src2);
+ context.Assembler.Asr(dest, src1, src2);
+ }
+ private static void GenerateShiftRightUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ ValidateShift(dest, src1, src2);
+ context.Assembler.Lsr(dest, src1, src2);
+ }
+ private static void GenerateSignExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+ context.Assembler.Sxth(dest, source);
+ }
+ private static void GenerateSignExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+ context.Assembler.Sxtw(dest, source);
+ }
+ private static void GenerateSignExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+ context.Assembler.Sxtb(dest, source);
+ }
+ private static void GenerateFill(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+ int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize;
+ GenerateLoad(context, dest, Register(SpRegister), offs);
+ }
+ private static void GenerateStore(CodeGenContext context, Operand value, Operand address, int offset)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type))
+ {
+ context.Assembler.StrRiUn(value, address, offset);
+ }
+ else if (CodeGenCommon.ConstFitsOnSImm9(offset))
+ {
+ context.Assembler.Stur(value, address, offset);
+ }
+ else
+ {
+ Operand tempAddress = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempAddress, (ulong)offset);
+ context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input.
+ context.Assembler.StrRiUn(value, tempAddress, 0);
+ }
+ }
+ private static void GenerateSpill(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, context.CallArgsRegionSize + context.FpLrSaveRegionSize);
+ }
+ private static void GenerateSpillArg(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, 0);
+ }
+ private static void GenerateStackAlloc(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+ int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize;
+ context.Assembler.Add(dest, Register(SpRegister), Const(dest.Type, offs));
+ }
+ private static void GenerateStore(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+ context.Assembler.Str(value, address);
+ }
+ private static void GenerateStore16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+ Debug.Assert(value.Type.IsInteger());
+ context.Assembler.StrhRiUn(value, address, 0);
+ }
+ private static void GenerateStore8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+ Debug.Assert(value.Type.IsInteger());
+ context.Assembler.StrbRiUn(value, address, 0);
+ }
+ private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset)
+ {
+ Operand offset = operation.GetSource(0);
+ Operand source = operation.GetSource(1);
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+ int offs = offset.AsInt32() + baseOffset;
+ GenerateStore(context, source, Register(SpRegister), offs);
+ }
+ private static void GenerateSubtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ // ValidateBinOp(dest, src1, src2);
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sub(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FsubScalar(dest, src1, src2);
+ }
+ }
+ private static void GenerateTailcall(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+ context.Assembler.Br(operation.GetSource(0));
+ }
+ private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ if (dest != default)
+ {
+ Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger());
+ OperandType destType = source.Type == OperandType.I64 ? OperandType.FP64 : OperandType.FP32;
+ context.Assembler.Fmov(Register(dest, destType), source, topHalf: false);
+ }
+ }
+ private static void GenerateVectorExtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+ byte index = src2.AsByte();
+ Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes());
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Umov(dest, src1, index, dest.Type == OperandType.I64 ? 3 : 2);
+ }
+ else
+ {
+ context.Assembler.DupScalar(dest, src1, index, dest.Type == OperandType.FP64 ? 3 : 2);
+ }
+ }
+ private static void GenerateVectorExtract16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+ byte index = src2.AsByte();
+ Debug.Assert(index < 8);
+ context.Assembler.Umov(dest, src1, index, 1);
+ }
+ private static void GenerateVectorExtract8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+ byte index = src2.AsByte();
+ Debug.Assert(index < 16);
+ context.Assembler.Umov(dest, src1, index, 0);
+ }
+ private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+ EnsureSameReg(dest, src1);
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+ byte index = src3.AsByte();
+ if (src2.Type.IsInteger())
+ {
+ context.Assembler.Ins(dest, src2, index, src2.Type == OperandType.I64 ? 3 : 2);
+ }
+ else
+ {
+ context.Assembler.Ins(dest, src2, 0, index, src2.Type == OperandType.FP64 ? 3 : 2);
+ }
+ }
+ private static void GenerateVectorInsert16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+ EnsureSameReg(dest, src1);
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+ byte index = src3.AsByte();
+ context.Assembler.Ins(dest, src2, index, 1);
+ }
+ private static void GenerateVectorInsert8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+ EnsureSameReg(dest, src1);
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+ byte index = src3.AsByte();
+ context.Assembler.Ins(dest, src2, index, 0);
+ }
+ private static void GenerateVectorOne(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Debug.Assert(!dest.Type.IsInteger());
+ context.Assembler.CmeqVector(dest, dest, dest, 2);
+ }
+ private static void GenerateVectorZero(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Debug.Assert(!dest.Type.IsInteger());
+ context.Assembler.EorVector(dest, dest, dest);
+ }
+ private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+ context.Assembler.Fmov(Register(dest, OperandType.FP64), Register(source, OperandType.FP64));
+ }
+ private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+ context.Assembler.Fmov(Register(dest, OperandType.FP32), Register(source, OperandType.FP32));
+ }
+ private static void GenerateZeroExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+ context.Assembler.Uxth(dest, source);
+ }
+ private static void GenerateZeroExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+ // We can eliminate the move if source is already 32-bit and the registers are the same.
+ if (dest.Value == source.Value && source.Type == OperandType.I32)
+ {
+ return;
+ }
+ context.Assembler.Mov(Register(dest.GetRegister().Index, OperandType.I32), source);
+ }
+ private static void GenerateZeroExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+ context.Assembler.Uxtb(dest, source);
+ }
+ private static UnwindInfo WritePrologue(CodeGenContext context)
+ {
+ List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>();
+ Operand rsp = Register(SpRegister);
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+ int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+ int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+ int calleeSaveRegionSize = Align16(intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8);
+ int offset = 0;
+ WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+ WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64);
+ int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize);
+ int outArgsSize = context.CallArgsRegionSize;
+ if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale))
+ {
+ if (context.HasCall)
+ {
+ context.Assembler.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, -localSize);
+ context.Assembler.MovSp(Register(FpRegister), rsp);
+ }
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, outArgsSize));
+ }
+ }
+ else
+ {
+ int frameSize = localSize + outArgsSize;
+ if (frameSize != 0)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(frameSize))
+ {
+ context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, frameSize));
+ }
+ else
+ {
+ Operand tempSize = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempSize, (ulong)frameSize);
+ context.Assembler.Sub(rsp, rsp, tempSize, ArmExtensionType.Uxtx);
+ }
+ }
+ context.Assembler.StpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize);
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Add(Register(FpRegister), Register(SpRegister), Const(OperandType.I64, outArgsSize));
+ }
+ else
+ {
+ context.Assembler.MovSp(Register(FpRegister), Register(SpRegister));
+ }
+ }
+ return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset);
+ }
+ private static void WritePrologueCalleeSavesPreIndexed(
+ CodeGenContext context,
+ List<UnwindPushEntry> pushEntries,
+ ref int mask,
+ ref int offset,
+ int calleeSaveRegionSize,
+ OperandType type)
+ {
+ if ((BitOperations.PopCount((uint)mask) & 1) != 0)
+ {
+ int reg = BitOperations.TrailingZeroCount(mask);
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg));
+ mask &= ~(1 << reg);
+ if (offset != 0)
+ {
+ context.Assembler.StrRiUn(Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.StrRiPre(Register(reg, type), Register(SpRegister), -calleeSaveRegionSize);
+ }
+ offset += type.GetSizeInBytes();
+ }
+ while (mask != 0)
+ {
+ int reg = BitOperations.TrailingZeroCount(mask);
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg));
+ mask &= ~(1 << reg);
+ int reg2 = BitOperations.TrailingZeroCount(mask);
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg2));
+ mask &= ~(1 << reg2);
+ if (offset != 0)
+ {
+ context.Assembler.StpRiUn(Register(reg, type), Register(reg2, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.StpRiPre(Register(reg, type), Register(reg2, type), Register(SpRegister), -calleeSaveRegionSize);
+ }
+ offset += type.GetSizeInBytes() * 2;
+ }
+ }
+ private static void WriteEpilogue(CodeGenContext context)
+ {
+ Operand rsp = Register(SpRegister);
+ int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize);
+ int outArgsSize = context.CallArgsRegionSize;
+ if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale))
+ {
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Add(rsp, rsp, Const(OperandType.I64, outArgsSize));
+ }
+ if (context.HasCall)
+ {
+ context.Assembler.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, localSize);
+ }
+ }
+ else
+ {
+ if (context.HasCall)
+ {
+ context.Assembler.LdpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize);
+ }
+ int frameSize = localSize + outArgsSize;
+ if (frameSize != 0)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(frameSize))
+ {
+ context.Assembler.Add(rsp, rsp, Const(OperandType.I64, frameSize));
+ }
+ else
+ {
+ Operand tempSize = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempSize, (ulong)frameSize);
+ context.Assembler.Add(rsp, rsp, tempSize, ArmExtensionType.Uxtx);
+ }
+ }
+ }
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+ int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+ int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+ int offset = intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8;
+ int calleeSaveRegionSize = Align16(offset);
+ WriteEpilogueCalleeSavesPostIndexed(context, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64);
+ WriteEpilogueCalleeSavesPostIndexed(context, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+ }
+ private static void WriteEpilogueCalleeSavesPostIndexed(
+ CodeGenContext context,
+ ref int mask,
+ ref int offset,
+ int calleeSaveRegionSize,
+ OperandType type)
+ {
+ while (mask != 0)
+ {
+ int reg = BitUtils.HighestBitSet(mask);
+ mask &= ~(1 << reg);
+ if (mask != 0)
+ {
+ int reg2 = BitUtils.HighestBitSet(mask);
+ mask &= ~(1 << reg2);
+ offset -= type.GetSizeInBytes() * 2;
+ if (offset != 0)
+ {
+ context.Assembler.LdpRiUn(Register(reg2, type), Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.LdpRiPost(Register(reg2, type), Register(reg, type), Register(SpRegister), calleeSaveRegionSize);
+ }
+ }
+ else
+ {
+ offset -= type.GetSizeInBytes();
+ if (offset != 0)
+ {
+ context.Assembler.LdrRiUn(Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.LdrRiPost(Register(reg, type), Register(SpRegister), calleeSaveRegionSize);
+ }
+ }
+ }
+ }
+ private static void GenerateConstantCopy(CodeGenContext context, Operand dest, ulong value)
+ {
+ if (value != 0)
+ {
+ int hw = 0;
+ bool first = true;
+ while (value != 0)
+ {
+ int valueLow = (ushort)value;
+ if (valueLow != 0)
+ {
+ if (first)
+ {
+ context.Assembler.Movz(dest, valueLow, hw);
+ first = false;
+ }
+ else
+ {
+ context.Assembler.Movk(dest, valueLow, hw);
+ }
+ }
+ hw++;
+ value >>= 16;
+ }
+ }
+ else
+ {
+ context.Assembler.Mov(dest, Register(ZrRegister, dest.Type));
+ }
+ }
+ private static void GenerateAtomicCas(
+ CodeGenContext context,
+ Operand address,
+ Operand expected,
+ Operand desired,
+ Operand actual,
+ Operand result,
+ AccessSize accessSize)
+ {
+ int startOffset = context.StreamOffset;
+ switch (accessSize)
+ {
+ case AccessSize.Byte:
+ context.Assembler.Ldaxrb(actual, address);
+ break;
+ case AccessSize.Hword:
+ context.Assembler.Ldaxrh(actual, address);
+ break;
+ default:
+ context.Assembler.Ldaxr(actual, address);
+ break;
+ }
+ context.Assembler.Cmp(actual, expected);
+ context.JumpToNear(ArmCondition.Ne);
+ switch (accessSize)
+ {
+ case AccessSize.Byte:
+ context.Assembler.Stlxrb(desired, address, result);
+ break;
+ case AccessSize.Hword:
+ context.Assembler.Stlxrh(desired, address, result);
+ break;
+ default:
+ context.Assembler.Stlxr(desired, address, result);
+ break;
+ }
+ context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed.
+ context.JumpHere();
+ context.Assembler.Clrex();
+ }
+ private static void GenerateAtomicDcas(
+ CodeGenContext context,
+ Operand address,
+ Operand expectedLow,
+ Operand expectedHigh,
+ Operand desiredLow,
+ Operand desiredHigh,
+ Operand actualLow,
+ Operand actualHigh,
+ Operand temp0,
+ Operand temp1)
+ {
+ int startOffset = context.StreamOffset;
+ context.Assembler.Ldaxp(actualLow, actualHigh, address);
+ context.Assembler.Eor(temp0, actualHigh, expectedHigh);
+ context.Assembler.Eor(temp1, actualLow, expectedLow);
+ context.Assembler.Orr(temp0, temp1, temp0);
+ context.JumpToNearIfNotZero(temp0);
+ Operand result = Register(temp0, OperandType.I32);
+ context.Assembler.Stlxp(desiredLow, desiredHigh, address, result);
+ context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed.
+ context.JumpHere();
+ context.Assembler.Clrex();
+ }
+ private static bool TryPairMemoryOp(CodeGenContext context, Operation currentOp, Operation nextOp)
+ {
+ if (!TryGetMemOpBaseAndOffset(currentOp, out Operand op1Base, out int op1Offset))
+ {
+ return false;
+ }
+ if (!TryGetMemOpBaseAndOffset(nextOp, out Operand op2Base, out int op2Offset))
+ {
+ return false;
+ }
+ if (op1Base != op2Base)
+ {
+ return false;
+ }
+ OperandType valueType = GetMemOpValueType(currentOp);
+ if (valueType != GetMemOpValueType(nextOp) || op1Offset + valueType.GetSizeInBytes() != op2Offset)
+ {
+ return false;
+ }
+ if (!CodeGenCommon.ConstFitsOnSImm7(op1Offset, valueType.GetSizeInBytesLog2()))
+ {
+ return false;
+ }
+ if (currentOp.Instruction == Instruction.Load)
+ {
+ context.Assembler.LdpRiUn(currentOp.Destination, nextOp.Destination, op1Base, op1Offset);
+ }
+ else if (currentOp.Instruction == Instruction.Store)
+ {
+ context.Assembler.StpRiUn(currentOp.GetSource(1), nextOp.GetSource(1), op1Base, op1Offset);
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+ }
+ private static bool IsLoadOrStore(Operation operation)
+ {
+ return operation.Instruction == Instruction.Load || operation.Instruction == Instruction.Store;
+ }
+ private static OperandType GetMemOpValueType(Operation operation)
+ {
+ if (operation.Destination != default)
+ {
+ return operation.Destination.Type;
+ }
+ return operation.GetSource(1).Type;
+ }
+ private static bool TryGetMemOpBaseAndOffset(Operation operation, out Operand baseAddress, out int offset)
+ {
+ baseAddress = default;
+ offset = 0;
+ Operand address = operation.GetSource(0);
+ if (address.Kind != OperandKind.Memory)
+ {
+ return false;
+ }
+ MemoryOperand memOp = address.GetMemory();
+ Operand baseOp = memOp.BaseAddress;
+ if (baseOp == default)
+ {
+ baseOp = memOp.Index;
+ if (baseOp == default || memOp.Scale != Multiplier.x1)
+ {
+ return false;
+ }
+ }
+ if (memOp.Index != default)
+ {
+ return false;
+ }
+ baseAddress = memOp.BaseAddress;
+ offset = memOp.Displacement;
+ return true;
+ }
+ private static Operand Register(Operand operand, OperandType type = OperandType.I64)
+ {
+ return Register(operand.GetRegister().Index, type);
+ }
+ private static Operand Register(int register, OperandType type = OperandType.I64)
+ {
+ return Factory.Register(register, RegisterType.Integer, type);
+ }
+ private static int Align16(int value)
+ {
+ return (value + 0xf) & ~0xf;
+ }
+ [Conditional("DEBUG")]
+ private static void ValidateUnOp(Operand dest, Operand source)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, source);
+ EnsureSameType(dest, source);
+ }
+ [Conditional("DEBUG")]
+ private static void ValidateBinOp(Operand dest, Operand src1, Operand src2)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1, src2);
+ }
+ [Conditional("DEBUG")]
+ private static void ValidateShift(Operand dest, Operand src1, Operand src2)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1);
+ Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32);
+ }
+ private static void EnsureSameReg(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory);
+ Debug.Assert(op1.Kind == op2.Kind);
+ Debug.Assert(op1.Value == op2.Value);
+ }
+ private static void EnsureSameType(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ }
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ }
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ Debug.Assert(op1.Type == op4.Type);
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs b/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs
new file mode 100644
index 00000000..aaa00bb6
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs
@@ -0,0 +1,662 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+namespace ARMeilleure.CodeGen.Arm64
+ static class CodeGeneratorIntrinsic
+ {
+ public static void GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ Intrinsic intrin = operation.Intrinsic;
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrin & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+ switch (info.Type)
+ {
+ case IntrinsicType.ScalarUnary:
+ GenerateVectorUnary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarUnaryByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ GenerateVectorUnaryByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarBinary:
+ GenerateVectorBinary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryFPByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+ GenerateVectorBinaryFPByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryRd:
+ GenerateVectorUnary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryShl:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ GenerateVectorBinaryShlImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarBinaryShr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPCompare:
+ GenerateScalarFPCompare(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarFPConvFixed:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPConvFixedGpr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ GenerateScalarFPConvGpr(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPConvGpr:
+ GenerateScalarFPConvGpr(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarTernary:
+ GenerateScalarTernary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2),
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarTernaryFPRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+ GenerateVectorBinaryFPByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.ScalarTernaryShlRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+ GenerateVectorBinaryShlImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+ case IntrinsicType.ScalarTernaryShrRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+ case IntrinsicType.VectorUnary:
+ GenerateVectorUnary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.VectorUnaryByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ GenerateVectorUnaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.VectorBinary:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryBitwise:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+ GenerateVectorBinaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryFPByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+ GenerateVectorBinaryFPByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryRd:
+ GenerateVectorUnary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryShl:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ GenerateVectorBinaryShlImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorBinaryShr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorFPConvFixed:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorInsertByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+ GenerateVectorInsertByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorLookupTable:
+ Debug.Assert((uint)(operation.SourcesCount - 2) <= 3);
+ for (int i = 1; i < operation.SourcesCount - 1; i++)
+ {
+ Register currReg = operation.GetSource(i).GetRegister();
+ Register prevReg = operation.GetSource(i - 1).GetRegister();
+ Debug.Assert(prevReg.Index + 1 == currReg.Index && currReg.Type == RegisterType.Vector);
+ }
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst | ((uint)(operation.SourcesCount - 2) << 13),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(operation.SourcesCount - 1));
+ break;
+ case IntrinsicType.VectorTernaryFPRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+ GenerateVectorBinaryFPByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRd:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRdBitwise:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+ GenerateVectorBinaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryShlRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+ GenerateVectorBinaryShlImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+ case IntrinsicType.VectorTernaryShrRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+ case IntrinsicType.GetRegister:
+ context.Assembler.WriteInstruction(info.Inst, operation.Destination);
+ break;
+ case IntrinsicType.SetRegister:
+ context.Assembler.WriteInstruction(info.Inst, operation.GetSource(0));
+ break;
+ default:
+ throw new NotImplementedException(info.Type.ToString());
+ }
+ }
+ private static void GenerateScalarFPCompare(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand dest,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (sz << 22);
+ if (rm.Kind == OperandKind.Constant && rm.Value == 0)
+ {
+ instruction |= 0b1000;
+ rm = rn;
+ }
+ context.Assembler.WriteInstructionRm16NoRet(instruction, rn, rm);
+ context.Assembler.Mrs(dest, 1, 3, 4, 2, 0);
+ }
+ private static void GenerateScalarFPConvGpr(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn)
+ {
+ instruction |= (sz << 22);
+ if (rd.Type.IsInteger())
+ {
+ context.Assembler.WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= Assembler.SfFlag;
+ }
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ }
+ private static void GenerateScalarFPConvGpr(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint fBits)
+ {
+ Debug.Assert(fBits <= 64);
+ instruction |= (sz << 22);
+ instruction |= (64 - fBits) << 10;
+ if (rd.Type.IsInteger())
+ {
+ Debug.Assert(rd.Type != OperandType.I32 || fBits <= 32);
+ context.Assembler.WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= Assembler.SfFlag;
+ }
+ else
+ {
+ Debug.Assert(fBits <= 32);
+ }
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ }
+ private static void GenerateScalarTernary(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ Operand ra)
+ {
+ instruction |= (sz << 22);
+ context.Assembler.WriteInstruction(instruction, rd, rn, rm, ra);
+ }
+ private static void GenerateVectorUnary(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn)
+ {
+ instruction |= (q << 30) | (sz << 22);
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ private static void GenerateVectorUnaryByElem(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn)
+ {
+ uint imm5 = (srcIndex << ((int)sz + 1)) | (1u << (int)sz);
+ instruction |= (q << 30) | (imm5 << 16);
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ private static void GenerateVectorBinary(
+ CodeGenContext context,
+ uint q,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30);
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+ private static void GenerateVectorBinary(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (sz << 22);
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+ private static void GenerateVectorBinaryByElem(
+ CodeGenContext context,
+ uint q,
+ uint size,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (size << 22);
+ if (size == 2)
+ {
+ instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
+ }
+ else
+ {
+ instruction |= ((srcIndex & 3) << 20) | ((srcIndex & 4) << 9);
+ }
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+ private static void GenerateVectorBinaryFPByElem(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (sz << 22);
+ if (sz != 0)
+ {
+ instruction |= (srcIndex & 1) << 11;
+ }
+ else
+ {
+ instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
+ }
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+ private static void GenerateVectorBinaryShlImm(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint shift)
+ {
+ instruction |= (q << 30);
+ Debug.Assert(shift >= 0 && shift < (8u << (int)sz));
+ uint imm = (8u << (int)sz) | (shift & (0x3fu >> (int)(3 - sz)));
+ instruction |= (imm << 16);
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ private static void GenerateVectorBinaryShrImm(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint shift)
+ {
+ instruction |= (q << 30);
+ Debug.Assert(shift > 0 && shift <= (8u << (int)sz));
+ uint imm = (8u << (int)sz) | ((8u << (int)sz) - shift);
+ instruction |= (imm << 16);
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ private static void GenerateVectorInsertByElem(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ uint dstIndex,
+ Operand rd,
+ Operand rn)
+ {
+ uint imm4 = srcIndex << (int)sz;
+ uint imm5 = (dstIndex << ((int)sz + 1)) | (1u << (int)sz);
+ instruction |= imm4 << 11;
+ instruction |= imm5 << 16;
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs
new file mode 100644
index 00000000..8695db90
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.Arm64
+ struct IntrinsicInfo
+ {
+ public uint Inst { get; }
+ public IntrinsicType Type { get; }
+ public IntrinsicInfo(uint inst, IntrinsicType type)
+ {
+ Inst = inst;
+ Type = type;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
new file mode 100644
index 00000000..53ef152e
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
@@ -0,0 +1,461 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+namespace ARMeilleure.CodeGen.Arm64
+ static class IntrinsicTable
+ {
+ private static IntrinsicInfo[] _intrinTable;
+ static IntrinsicTable()
+ {
+ _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
+ Add(Intrinsic.Arm64AbsS, new IntrinsicInfo(0x5e20b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64AbsV, new IntrinsicInfo(0x0e20b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64AddhnV, new IntrinsicInfo(0x0e204000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64AddpS, new IntrinsicInfo(0x5e31b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64AddpV, new IntrinsicInfo(0x0e20bc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64BicVi, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorBinaryBitwiseImm));
+ Add(Intrinsic.Arm64BicV, new IntrinsicInfo(0x0e601c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64BifV, new IntrinsicInfo(0x2ee01c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64BitV, new IntrinsicInfo(0x2ea01c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64BslV, new IntrinsicInfo(0x2e601c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64ClsV, new IntrinsicInfo(0x0e204800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ClzV, new IntrinsicInfo(0x2e204800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmeqS, new IntrinsicInfo(0x7e208c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmeqV, new IntrinsicInfo(0x2e208c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmeqSz, new IntrinsicInfo(0x5e209800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmeqVz, new IntrinsicInfo(0x0e209800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmgeS, new IntrinsicInfo(0x5e203c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmgeV, new IntrinsicInfo(0x0e203c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmgeSz, new IntrinsicInfo(0x7e208800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmgeVz, new IntrinsicInfo(0x2e208800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmgtS, new IntrinsicInfo(0x5e203400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmgtV, new IntrinsicInfo(0x0e203400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmgtSz, new IntrinsicInfo(0x5e208800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmgtVz, new IntrinsicInfo(0x0e208800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmhiS, new IntrinsicInfo(0x7e203400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmhiV, new IntrinsicInfo(0x2e203400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmhsS, new IntrinsicInfo(0x7e203c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmhsV, new IntrinsicInfo(0x2e203c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmleSz, new IntrinsicInfo(0x7e209800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmleVz, new IntrinsicInfo(0x2e209800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmltSz, new IntrinsicInfo(0x5e20a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmltVz, new IntrinsicInfo(0x0e20a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmtstS, new IntrinsicInfo(0x5e208c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmtstV, new IntrinsicInfo(0x0e208c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CntV, new IntrinsicInfo(0x0e205800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64DupSe, new IntrinsicInfo(0x5e000400u, IntrinsicType.ScalarUnaryByElem));
+ Add(Intrinsic.Arm64DupVe, new IntrinsicInfo(0x0e000400u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64DupGp, new IntrinsicInfo(0x0e000c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64EorV, new IntrinsicInfo(0x2e201c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64ExtV, new IntrinsicInfo(0x2e000000u, IntrinsicType.VectorExt));
+ Add(Intrinsic.Arm64FabdS, new IntrinsicInfo(0x7ea0d400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FabdV, new IntrinsicInfo(0x2ea0d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FabsV, new IntrinsicInfo(0x0ea0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FabsS, new IntrinsicInfo(0x1e20c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FacgeS, new IntrinsicInfo(0x7e20ec00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FacgeV, new IntrinsicInfo(0x2e20ec00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FacgtS, new IntrinsicInfo(0x7ea0ec00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FacgtV, new IntrinsicInfo(0x2ea0ec00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddpS, new IntrinsicInfo(0x7e30d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FaddpV, new IntrinsicInfo(0x2e20d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddV, new IntrinsicInfo(0x0e20d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddS, new IntrinsicInfo(0x1e202800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FccmpeS, new IntrinsicInfo(0x1e200410u, IntrinsicType.ScalarFPCompareCond));
+ Add(Intrinsic.Arm64FccmpS, new IntrinsicInfo(0x1e200400u, IntrinsicType.ScalarFPCompareCond));
+ Add(Intrinsic.Arm64FcmeqS, new IntrinsicInfo(0x5e20e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmeqV, new IntrinsicInfo(0x0e20e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmeqSz, new IntrinsicInfo(0x5ea0d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmeqVz, new IntrinsicInfo(0x0ea0d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmgeS, new IntrinsicInfo(0x7e20e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmgeV, new IntrinsicInfo(0x2e20e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmgeSz, new IntrinsicInfo(0x7ea0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmgeVz, new IntrinsicInfo(0x2ea0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmgtS, new IntrinsicInfo(0x7ea0e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmgtV, new IntrinsicInfo(0x2ea0e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmgtSz, new IntrinsicInfo(0x5ea0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmgtVz, new IntrinsicInfo(0x0ea0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmleSz, new IntrinsicInfo(0x7ea0d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmleVz, new IntrinsicInfo(0x2ea0d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmltSz, new IntrinsicInfo(0x5ea0e800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmltVz, new IntrinsicInfo(0x0ea0e800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmpeS, new IntrinsicInfo(0x1e202010u, IntrinsicType.ScalarFPCompare));
+ Add(Intrinsic.Arm64FcmpS, new IntrinsicInfo(0x1e202000u, IntrinsicType.ScalarFPCompare));
+ Add(Intrinsic.Arm64FcselS, new IntrinsicInfo(0x1e200c00u, IntrinsicType.ScalarFcsel));
+ Add(Intrinsic.Arm64FcvtasS, new IntrinsicInfo(0x5e21c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtasV, new IntrinsicInfo(0x0e21c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtasGp, new IntrinsicInfo(0x1e240000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtauS, new IntrinsicInfo(0x7e21c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtauV, new IntrinsicInfo(0x2e21c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtauGp, new IntrinsicInfo(0x1e250000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtlV, new IntrinsicInfo(0x0e217800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmsS, new IntrinsicInfo(0x5e21b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtmsV, new IntrinsicInfo(0x0e21b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmsGp, new IntrinsicInfo(0x1e300000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtmuS, new IntrinsicInfo(0x7e21b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtmuV, new IntrinsicInfo(0x2e21b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmuGp, new IntrinsicInfo(0x1e310000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnsS, new IntrinsicInfo(0x5e21a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtnsV, new IntrinsicInfo(0x0e21a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtnsGp, new IntrinsicInfo(0x1e200000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnuS, new IntrinsicInfo(0x7e21a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtnuV, new IntrinsicInfo(0x2e21a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtnuGp, new IntrinsicInfo(0x1e210000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnV, new IntrinsicInfo(0x0e216800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64FcvtpsS, new IntrinsicInfo(0x5ea1a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtpsV, new IntrinsicInfo(0x0ea1a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtpsGp, new IntrinsicInfo(0x1e280000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtpuS, new IntrinsicInfo(0x7ea1a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtpuV, new IntrinsicInfo(0x2ea1a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtpuGp, new IntrinsicInfo(0x1e290000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtxnS, new IntrinsicInfo(0x7e216800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtxnV, new IntrinsicInfo(0x2e216800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzsSFixed, new IntrinsicInfo(0x5f00fc00u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzsVFixed, new IntrinsicInfo(0x0f00fc00u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzsS, new IntrinsicInfo(0x5ea1b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtzsV, new IntrinsicInfo(0x0ea1b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzsGpFixed, new IntrinsicInfo(0x1e180000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64FcvtzsGp, new IntrinsicInfo(0x1e380000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtzuSFixed, new IntrinsicInfo(0x7f00fc00u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzuVFixed, new IntrinsicInfo(0x2f00fc00u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzuS, new IntrinsicInfo(0x7ea1b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtzuV, new IntrinsicInfo(0x2ea1b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzuGpFixed, new IntrinsicInfo(0x1e190000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64FcvtzuGp, new IntrinsicInfo(0x1e390000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtS, new IntrinsicInfo(0x1e224000u, IntrinsicType.ScalarFPConv));
+ Add(Intrinsic.Arm64FdivV, new IntrinsicInfo(0x2e20fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FdivS, new IntrinsicInfo(0x1e201800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmaddS, new IntrinsicInfo(0x1f000000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FmaxnmpS, new IntrinsicInfo(0x7e30c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmaxnmpV, new IntrinsicInfo(0x2e20c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxnmvV, new IntrinsicInfo(0x2e30c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FmaxnmV, new IntrinsicInfo(0x0e20c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxnmS, new IntrinsicInfo(0x1e206800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmaxpS, new IntrinsicInfo(0x7e30f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmaxpV, new IntrinsicInfo(0x2e20f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxvV, new IntrinsicInfo(0x2e30f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FmaxV, new IntrinsicInfo(0x0e20f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxS, new IntrinsicInfo(0x1e204800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FminnmpS, new IntrinsicInfo(0x7eb0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FminnmpV, new IntrinsicInfo(0x2ea0c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminnmvV, new IntrinsicInfo(0x2eb0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FminnmV, new IntrinsicInfo(0x0ea0c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminnmS, new IntrinsicInfo(0x1e207800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FminpS, new IntrinsicInfo(0x7eb0f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FminpV, new IntrinsicInfo(0x2ea0f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminvV, new IntrinsicInfo(0x2eb0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FminV, new IntrinsicInfo(0x0ea0f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminS, new IntrinsicInfo(0x1e205800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmlaSe, new IntrinsicInfo(0x5f801000u, IntrinsicType.ScalarTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlaVe, new IntrinsicInfo(0x0f801000u, IntrinsicType.VectorTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlaV, new IntrinsicInfo(0x0e20cc00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64FmlsSe, new IntrinsicInfo(0x5f805000u, IntrinsicType.ScalarTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlsVe, new IntrinsicInfo(0x0f805000u, IntrinsicType.VectorTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlsV, new IntrinsicInfo(0x0ea0cc00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64FmovVi, new IntrinsicInfo(0x0f00f400u, IntrinsicType.VectorFmovi));
+ Add(Intrinsic.Arm64FmovS, new IntrinsicInfo(0x1e204000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmovGp, new IntrinsicInfo(0x1e260000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FmovSi, new IntrinsicInfo(0x1e201000u, IntrinsicType.ScalarFmovi));
+ Add(Intrinsic.Arm64FmsubS, new IntrinsicInfo(0x1f008000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FmulxSe, new IntrinsicInfo(0x7f809000u, IntrinsicType.ScalarBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulxVe, new IntrinsicInfo(0x2f809000u, IntrinsicType.VectorBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulxS, new IntrinsicInfo(0x5e20dc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmulxV, new IntrinsicInfo(0x0e20dc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmulSe, new IntrinsicInfo(0x5f809000u, IntrinsicType.ScalarBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulVe, new IntrinsicInfo(0x0f809000u, IntrinsicType.VectorBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulV, new IntrinsicInfo(0x2e20dc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmulS, new IntrinsicInfo(0x1e200800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FnegV, new IntrinsicInfo(0x2ea0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FnegS, new IntrinsicInfo(0x1e214000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FnmaddS, new IntrinsicInfo(0x1f200000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FnmsubS, new IntrinsicInfo(0x1f208000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FnmulS, new IntrinsicInfo(0x1e208800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrecpeS, new IntrinsicInfo(0x5ea1d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrecpeV, new IntrinsicInfo(0x0ea1d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrecpsS, new IntrinsicInfo(0x5e20fc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrecpsV, new IntrinsicInfo(0x0e20fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FrecpxS, new IntrinsicInfo(0x5ea1f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintaV, new IntrinsicInfo(0x2e218800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintaS, new IntrinsicInfo(0x1e264000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintiV, new IntrinsicInfo(0x2ea19800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintiS, new IntrinsicInfo(0x1e27c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintmV, new IntrinsicInfo(0x0e219800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintmS, new IntrinsicInfo(0x1e254000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintnV, new IntrinsicInfo(0x0e218800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintnS, new IntrinsicInfo(0x1e244000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintpV, new IntrinsicInfo(0x0ea18800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintpS, new IntrinsicInfo(0x1e24c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintxV, new IntrinsicInfo(0x2e219800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintxS, new IntrinsicInfo(0x1e274000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintzV, new IntrinsicInfo(0x0ea19800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintzS, new IntrinsicInfo(0x1e25c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrsqrteS, new IntrinsicInfo(0x7ea1d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrsqrteV, new IntrinsicInfo(0x2ea1d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrsqrtsS, new IntrinsicInfo(0x5ea0fc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrsqrtsV, new IntrinsicInfo(0x0ea0fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FsqrtV, new IntrinsicInfo(0x2ea1f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FsqrtS, new IntrinsicInfo(0x1e21c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FsubV, new IntrinsicInfo(0x0ea0d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FsubS, new IntrinsicInfo(0x1e203800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64InsVe, new IntrinsicInfo(0x6e000400u, IntrinsicType.VectorInsertByElem));
+ Add(Intrinsic.Arm64InsGp, new IntrinsicInfo(0x4e001c00u, IntrinsicType.ScalarUnaryByElem));
+ Add(Intrinsic.Arm64Ld1rV, new IntrinsicInfo(0x0d40c000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld1Vms, new IntrinsicInfo(0x0c402000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld1Vss, new IntrinsicInfo(0x0d400000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld2rV, new IntrinsicInfo(0x0d60c000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld2Vms, new IntrinsicInfo(0x0c408000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld2Vss, new IntrinsicInfo(0x0d600000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld3rV, new IntrinsicInfo(0x0d40e000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld3Vms, new IntrinsicInfo(0x0c404000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld3Vss, new IntrinsicInfo(0x0d402000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld4rV, new IntrinsicInfo(0x0d60e000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld4Vms, new IntrinsicInfo(0x0c400000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld4Vss, new IntrinsicInfo(0x0d602000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64MlaVe, new IntrinsicInfo(0x2f000000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64MlaV, new IntrinsicInfo(0x0e209400u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi));
+ Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister));
+ Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister));
+ Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64MulV, new IntrinsicInfo(0x0e209c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64MvniV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorMvni));
+ Add(Intrinsic.Arm64NegS, new IntrinsicInfo(0x7e20b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64NegV, new IntrinsicInfo(0x2e20b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64NotV, new IntrinsicInfo(0x2e205800u, IntrinsicType.VectorUnaryBitwise));
+ Add(Intrinsic.Arm64OrnV, new IntrinsicInfo(0x0ee01c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64OrrVi, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorBinaryBitwiseImm));
+ Add(Intrinsic.Arm64OrrV, new IntrinsicInfo(0x0ea01c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64PmullV, new IntrinsicInfo(0x0e20e000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64PmulV, new IntrinsicInfo(0x2e209c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64RaddhnV, new IntrinsicInfo(0x2e204000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64RbitV, new IntrinsicInfo(0x2e605800u, IntrinsicType.VectorUnaryBitwise));
+ Add(Intrinsic.Arm64Rev16V, new IntrinsicInfo(0x0e201800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Rev32V, new IntrinsicInfo(0x2e200800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Rev64V, new IntrinsicInfo(0x0e200800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64RshrnV, new IntrinsicInfo(0x0f008c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64RsubhnV, new IntrinsicInfo(0x2e206000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabalV, new IntrinsicInfo(0x0e205000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabaV, new IntrinsicInfo(0x0e207c00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabdlV, new IntrinsicInfo(0x0e207000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SabdV, new IntrinsicInfo(0x0e207400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SadalpV, new IntrinsicInfo(0x0e206800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SaddlpV, new IntrinsicInfo(0x0e202800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SaddlvV, new IntrinsicInfo(0x0e303800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SaddlV, new IntrinsicInfo(0x0e200000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SaddwV, new IntrinsicInfo(0x0e201000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64ScvtfSFixed, new IntrinsicInfo(0x5f00e400u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64ScvtfVFixed, new IntrinsicInfo(0x0f00e400u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64ScvtfS, new IntrinsicInfo(0x5e21d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64ScvtfV, new IntrinsicInfo(0x0e21d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ScvtfGpFixed, new IntrinsicInfo(0x1e020000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64ScvtfGp, new IntrinsicInfo(0x1e220000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64Sha1cV, new IntrinsicInfo(0x5e000000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1hV, new IntrinsicInfo(0x5e280800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha1mV, new IntrinsicInfo(0x5e002000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1pV, new IntrinsicInfo(0x5e001000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1su0V, new IntrinsicInfo(0x5e003000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1su1V, new IntrinsicInfo(0x5e281800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha256h2V, new IntrinsicInfo(0x5e005000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha256hV, new IntrinsicInfo(0x5e004000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha256su0V, new IntrinsicInfo(0x5e282800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha256su1V, new IntrinsicInfo(0x5e006000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64ShaddV, new IntrinsicInfo(0x0e200400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64ShllV, new IntrinsicInfo(0x2e213800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ShlS, new IntrinsicInfo(0x5f005400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64ShlV, new IntrinsicInfo(0x0f005400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64ShrnV, new IntrinsicInfo(0x0f008400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64ShsubV, new IntrinsicInfo(0x0e202400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SliS, new IntrinsicInfo(0x7f005400u, IntrinsicType.ScalarTernaryShlRd));
+ Add(Intrinsic.Arm64SliV, new IntrinsicInfo(0x2f005400u, IntrinsicType.VectorTernaryShlRd));
+ Add(Intrinsic.Arm64SmaxpV, new IntrinsicInfo(0x0e20a400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SmaxvV, new IntrinsicInfo(0x0e30a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SmaxV, new IntrinsicInfo(0x0e206400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SminpV, new IntrinsicInfo(0x0e20ac00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SminvV, new IntrinsicInfo(0x0e31a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SminV, new IntrinsicInfo(0x0e206c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SmlalVe, new IntrinsicInfo(0x0f002000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64SmlalV, new IntrinsicInfo(0x0e208000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SmlslVe, new IntrinsicInfo(0x0f006000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64SmlslV, new IntrinsicInfo(0x0e20a000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SmovV, new IntrinsicInfo(0x0e002c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64SmullVe, new IntrinsicInfo(0x0f00a000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SmullV, new IntrinsicInfo(0x0e20c000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqabsS, new IntrinsicInfo(0x5e207800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64SqabsV, new IntrinsicInfo(0x0e207800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SqaddS, new IntrinsicInfo(0x5e200c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqaddV, new IntrinsicInfo(0x0e200c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmlalSe, new IntrinsicInfo(0x5f003000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlalVe, new IntrinsicInfo(0x0f003000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlalS, new IntrinsicInfo(0x5e209000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmlalV, new IntrinsicInfo(0x0e209000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmlslSe, new IntrinsicInfo(0x5f007000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlslVe, new IntrinsicInfo(0x0f007000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlslS, new IntrinsicInfo(0x5e20b000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmlslV, new IntrinsicInfo(0x0e20b000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmulhSe, new IntrinsicInfo(0x5f00c000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmulhVe, new IntrinsicInfo(0x0f00c000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmulhS, new IntrinsicInfo(0x5e20b400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmulhV, new IntrinsicInfo(0x0e20b400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmullSe, new IntrinsicInfo(0x5f00b000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmullVe, new IntrinsicInfo(0x0f00b000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmullS, new IntrinsicInfo(0x5e20d000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmullV, new IntrinsicInfo(0x0e20d000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqnegS, new IntrinsicInfo(0x7e207800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64SqnegV, new IntrinsicInfo(0x2e207800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SqrdmulhSe, new IntrinsicInfo(0x5f00d000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqrdmulhVe, new IntrinsicInfo(0x0f00d000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqrdmulhS, new IntrinsicInfo(0x7e20b400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqrdmulhV, new IntrinsicInfo(0x2e20b400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqrshlS, new IntrinsicInfo(0x5e205c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqrshlV, new IntrinsicInfo(0x0e205c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqrshrnS, new IntrinsicInfo(0x5f009c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrnV, new IntrinsicInfo(0x0f009c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrunS, new IntrinsicInfo(0x7f008c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrunV, new IntrinsicInfo(0x2f008c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqshluS, new IntrinsicInfo(0x7f006400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64SqshluV, new IntrinsicInfo(0x2f006400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SqshlSi, new IntrinsicInfo(0x5f007400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64SqshlVi, new IntrinsicInfo(0x0f007400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SqshlS, new IntrinsicInfo(0x5e204c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqshlV, new IntrinsicInfo(0x0e204c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqshrnS, new IntrinsicInfo(0x5f009400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrnV, new IntrinsicInfo(0x0f009400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrunS, new IntrinsicInfo(0x7f008400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrunV, new IntrinsicInfo(0x2f008400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqsubS, new IntrinsicInfo(0x5e202c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqsubV, new IntrinsicInfo(0x0e202c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqxtnS, new IntrinsicInfo(0x5e214800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SqxtnV, new IntrinsicInfo(0x0e214800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SqxtunS, new IntrinsicInfo(0x7e212800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SqxtunV, new IntrinsicInfo(0x2e212800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SrhaddV, new IntrinsicInfo(0x0e201400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SriS, new IntrinsicInfo(0x7f004400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SriV, new IntrinsicInfo(0x2f004400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SrshlS, new IntrinsicInfo(0x5e205400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SrshlV, new IntrinsicInfo(0x0e205400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SrshrS, new IntrinsicInfo(0x5f002400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64SrshrV, new IntrinsicInfo(0x0f002400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64SrsraS, new IntrinsicInfo(0x5f003400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SrsraV, new IntrinsicInfo(0x0f003400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SshllV, new IntrinsicInfo(0x0f00a400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SshlS, new IntrinsicInfo(0x5e204400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SshlV, new IntrinsicInfo(0x0e204400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SshrS, new IntrinsicInfo(0x5f000400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64SshrV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64SsraS, new IntrinsicInfo(0x5f001400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SsraV, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SsublV, new IntrinsicInfo(0x0e202000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SsubwV, new IntrinsicInfo(0x0e203000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64St1Vms, new IntrinsicInfo(0x0c002000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St1Vss, new IntrinsicInfo(0x0d000000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St2Vms, new IntrinsicInfo(0x0c008000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St2Vss, new IntrinsicInfo(0x0d200000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St3Vms, new IntrinsicInfo(0x0c004000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St3Vss, new IntrinsicInfo(0x0d002000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St4Vms, new IntrinsicInfo(0x0c000000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St4Vss, new IntrinsicInfo(0x0d202000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64SubhnV, new IntrinsicInfo(0x0e206000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SubS, new IntrinsicInfo(0x7e208400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SubV, new IntrinsicInfo(0x2e208400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SuqaddS, new IntrinsicInfo(0x5e203800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SuqaddV, new IntrinsicInfo(0x0e203800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64TblV, new IntrinsicInfo(0x0e000000u, IntrinsicType.VectorLookupTable));
+ Add(Intrinsic.Arm64TbxV, new IntrinsicInfo(0x0e001000u, IntrinsicType.VectorLookupTable));
+ Add(Intrinsic.Arm64Trn1V, new IntrinsicInfo(0x0e002800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Trn2V, new IntrinsicInfo(0x0e006800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UabalV, new IntrinsicInfo(0x2e205000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UabaV, new IntrinsicInfo(0x2e207c00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UabdlV, new IntrinsicInfo(0x2e207000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UabdV, new IntrinsicInfo(0x2e207400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UadalpV, new IntrinsicInfo(0x2e206800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UaddlpV, new IntrinsicInfo(0x2e202800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UaddlvV, new IntrinsicInfo(0x2e303800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UaddlV, new IntrinsicInfo(0x2e200000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UaddwV, new IntrinsicInfo(0x2e201000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UcvtfSFixed, new IntrinsicInfo(0x7f00e400u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64UcvtfVFixed, new IntrinsicInfo(0x2f00e400u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64UcvtfS, new IntrinsicInfo(0x7e21d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64UcvtfV, new IntrinsicInfo(0x2e21d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UcvtfGpFixed, new IntrinsicInfo(0x1e030000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64UcvtfGp, new IntrinsicInfo(0x1e230000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64UhaddV, new IntrinsicInfo(0x2e200400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UhsubV, new IntrinsicInfo(0x2e202400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmaxpV, new IntrinsicInfo(0x2e20a400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmaxvV, new IntrinsicInfo(0x2e30a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UmaxV, new IntrinsicInfo(0x2e206400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UminpV, new IntrinsicInfo(0x2e20ac00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UminvV, new IntrinsicInfo(0x2e31a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UminV, new IntrinsicInfo(0x2e206c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmlalVe, new IntrinsicInfo(0x2f002000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64UmlalV, new IntrinsicInfo(0x2e208000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UmlslVe, new IntrinsicInfo(0x2f006000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64UmlslV, new IntrinsicInfo(0x2e20a000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UmovV, new IntrinsicInfo(0x0e003c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64UmullVe, new IntrinsicInfo(0x2f00a000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64UmullV, new IntrinsicInfo(0x2e20c000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqaddS, new IntrinsicInfo(0x7e200c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqaddV, new IntrinsicInfo(0x2e200c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqrshlS, new IntrinsicInfo(0x7e205c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqrshlV, new IntrinsicInfo(0x2e205c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqrshrnS, new IntrinsicInfo(0x7f009c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UqrshrnV, new IntrinsicInfo(0x2f009c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UqshlSi, new IntrinsicInfo(0x7f007400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64UqshlVi, new IntrinsicInfo(0x2f007400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64UqshlS, new IntrinsicInfo(0x7e204c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqshlV, new IntrinsicInfo(0x2e204c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqshrnS, new IntrinsicInfo(0x7f009400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UqshrnV, new IntrinsicInfo(0x2f009400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UqsubS, new IntrinsicInfo(0x7e202c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqsubV, new IntrinsicInfo(0x2e202c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqxtnS, new IntrinsicInfo(0x7e214800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64UqxtnV, new IntrinsicInfo(0x2e214800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UrecpeV, new IntrinsicInfo(0x0ea1c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UrhaddV, new IntrinsicInfo(0x2e201400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UrshlS, new IntrinsicInfo(0x7e205400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UrshlV, new IntrinsicInfo(0x2e205400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UrshrS, new IntrinsicInfo(0x7f002400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64UrshrV, new IntrinsicInfo(0x2f002400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64UrsqrteV, new IntrinsicInfo(0x2ea1c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UrsraS, new IntrinsicInfo(0x7f003400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UrsraV, new IntrinsicInfo(0x2f003400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UshllV, new IntrinsicInfo(0x2f00a400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64UshlS, new IntrinsicInfo(0x7e204400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UshlV, new IntrinsicInfo(0x2e204400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UshrS, new IntrinsicInfo(0x7f000400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64UshrV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64UsqaddS, new IntrinsicInfo(0x7e203800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64UsqaddV, new IntrinsicInfo(0x2e203800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UsraS, new IntrinsicInfo(0x7f001400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UsraV, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UsublV, new IntrinsicInfo(0x2e202000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UsubwV, new IntrinsicInfo(0x2e203000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Uzp1V, new IntrinsicInfo(0x0e001800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Uzp2V, new IntrinsicInfo(0x0e005800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64XtnV, new IntrinsicInfo(0x0e212800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Zip1V, new IntrinsicInfo(0x0e003800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Zip2V, new IntrinsicInfo(0x0e007800u, IntrinsicType.VectorBinary));
+ }
+ private static void Add(Intrinsic intrin, IntrinsicInfo info)
+ {
+ _intrinTable[(int)intrin] = info;
+ }
+ public static IntrinsicInfo GetInfo(Intrinsic intrin)
+ {
+ return _intrinTable[(int)intrin];
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs
new file mode 100644
index 00000000..800eca93
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs
@@ -0,0 +1,59 @@
+namespace ARMeilleure.CodeGen.Arm64
+ enum IntrinsicType
+ {
+ ScalarUnary,
+ ScalarUnaryByElem,
+ ScalarBinary,
+ ScalarBinaryByElem,
+ ScalarBinaryFPByElem,
+ ScalarBinaryRd,
+ ScalarBinaryShl,
+ ScalarBinaryShr,
+ ScalarFcsel,
+ ScalarFmovi,
+ ScalarFPCompare,
+ ScalarFPCompareCond,
+ ScalarFPConv,
+ ScalarFPConvFixed,
+ ScalarFPConvFixedGpr,
+ ScalarFPConvGpr,
+ ScalarTernary,
+ ScalarTernaryFPRdByElem,
+ ScalarTernaryShlRd,
+ ScalarTernaryShrRd,
+ VectorUnary,
+ VectorUnaryBitwise,
+ VectorUnaryByElem,
+ VectorBinary,
+ VectorBinaryBitwise,
+ VectorBinaryBitwiseImm,
+ VectorBinaryByElem,
+ VectorBinaryFPByElem,
+ VectorBinaryRd,
+ VectorBinaryShl,
+ VectorBinaryShr,
+ VectorExt,
+ VectorFmovi,
+ VectorFPConvFixed,
+ VectorInsertByElem,
+ VectorLdSt,
+ VectorLdStSs,
+ VectorLookupTable,
+ VectorMovi,
+ VectorMvni,
+ VectorTernaryFPRdByElem,
+ VectorTernaryRd,
+ VectorTernaryRdBitwise,
+ VectorTernaryRdByElem,
+ VectorTernaryShlRd,
+ VectorTernaryShrRd,
+ Vector128Unary,
+ Vector128Binary,
+ GetRegister,
+ SetRegister
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Arm64/PreAllocator.cs b/ARMeilleure/CodeGen/Arm64/PreAllocator.cs
new file mode 100644
index 00000000..a7f07394
--- /dev/null
+++ b/ARMeilleure/CodeGen/Arm64/PreAllocator.cs
@@ -0,0 +1,940 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+namespace ARMeilleure.CodeGen.Arm64
+ class PreAllocator
+ {
+ private class ConstantDict
+ {
+ private readonly Dictionary<(ulong, OperandType), Operand> _constants;
+ public ConstantDict()
+ {
+ _constants = new Dictionary<(ulong, OperandType), Operand>();
+ }
+ public void Add(ulong value, OperandType type, Operand local)
+ {
+ _constants.Add((value, type), local);
+ }
+ public bool TryGetValue(ulong value, OperandType type, out Operand local)
+ {
+ return _constants.TryGetValue((value, type), out local);
+ }
+ }
+ public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
+ {
+ maxCallArgs = -1;
+ Span<Operation> buffer = default;
+ Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
+ for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ ConstantDict constants = new ConstantDict();
+ Operation nextNode;
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+ if (node.Instruction == Instruction.Phi)
+ {
+ continue;
+ }
+ HandleConstantRegCopy(constants, block.Operations, node);
+ HandleDestructiveRegCopy(block.Operations, node);
+ switch (node.Instruction)
+ {
+ case Instruction.Call:
+ // Get the maximum number of arguments used on a call.
+ // On windows, when a struct is returned from the call,
+ // we also need to pass the pointer where the struct
+ // should be written on the first argument.
+ int argsCount = node.SourcesCount - 1;
+ if (node.Destination != default && node.Destination.Type == OperandType.V128)
+ {
+ argsCount++;
+ }
+ if (maxCallArgs < argsCount)
+ {
+ maxCallArgs = argsCount;
+ }
+ // Copy values to registers expected by the function
+ // being called, as mandated by the ABI.
+ HandleCall(constants, block.Operations, node);
+ break;
+ case Instruction.CompareAndSwap:
+ case Instruction.CompareAndSwap16:
+ case Instruction.CompareAndSwap8:
+ nextNode = HandleCompareAndSwap(block.Operations, node);
+ break;
+ case Instruction.LoadArgument:
+ nextNode = HandleLoadArgument(cctx, ref buffer, block.Operations, preservedArgs, node);
+ break;
+ case Instruction.Return:
+ HandleReturn(block.Operations, node);
+ break;
+ case Instruction.Tailcall:
+ HandleTailcall(constants, block.Operations, stackAlloc, node, node);
+ break;
+ }
+ }
+ }
+ }
+ private static void HandleConstantRegCopy(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.SourcesCount == 0 || IsIntrinsicWithConst(node))
+ {
+ return;
+ }
+ Instruction inst = node.Instruction;
+ Operand src1 = node.GetSource(0);
+ Operand src2;
+ if (src1.Kind == OperandKind.Constant)
+ {
+ if (!src1.Type.IsInteger())
+ {
+ // Handle non-integer types (FP32, FP64 and V128).
+ // For instructions without an immediate operand, we do the following:
+ // - Insert a copy with the constant value (as integer) to a GPR.
+ // - Insert a copy from the GPR to a XMM register.
+ // - Replace the constant use with the XMM register.
+ src1 = AddFloatConstantCopy(constants, nodes, node, src1);
+ node.SetSource(0, src1);
+ }
+ else if (!HasConstSrc1(node, src1.Value))
+ {
+ // Handle integer types.
+ // Most ALU instructions accepts a 32-bits immediate on the second operand.
+ // We need to ensure the following:
+ // - If the constant is on operand 1, we need to move it.
+ // -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
+ // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
+ // - If the constant is on operand 2, we check if the instruction supports it,
+ // if not, we also add a copy. 64-bits constants are usually not supported.
+ if (IsCommutative(node))
+ {
+ src2 = node.GetSource(1);
+ Operand temp = src1;
+ src1 = src2;
+ src2 = temp;
+ node.SetSource(0, src1);
+ node.SetSource(1, src2);
+ }
+ if (src1.Kind == OperandKind.Constant)
+ {
+ src1 = AddIntConstantCopy(constants, nodes, node, src1);
+ node.SetSource(0, src1);
+ }
+ }
+ }
+ if (node.SourcesCount < 2)
+ {
+ return;
+ }
+ src2 = node.GetSource(1);
+ if (src2.Kind == OperandKind.Constant)
+ {
+ if (!src2.Type.IsInteger())
+ {
+ src2 = AddFloatConstantCopy(constants, nodes, node, src2);
+ node.SetSource(1, src2);
+ }
+ else if (!HasConstSrc2(inst, src2))
+ {
+ src2 = AddIntConstantCopy(constants, nodes, node, src2);
+ node.SetSource(1, src2);
+ }
+ }
+ if (node.SourcesCount < 3 ||
+ node.Instruction == Instruction.BranchIf ||
+ node.Instruction == Instruction.Compare ||
+ node.Instruction == Instruction.VectorInsert ||
+ node.Instruction == Instruction.VectorInsert16 ||
+ node.Instruction == Instruction.VectorInsert8)
+ {
+ return;
+ }
+ for (int srcIndex = 2; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ Operand src = node.GetSource(srcIndex);
+ if (src.Kind == OperandKind.Constant)
+ {
+ if (!src.Type.IsInteger())
+ {
+ src = AddFloatConstantCopy(constants, nodes, node, src);
+ node.SetSource(srcIndex, src);
+ }
+ else
+ {
+ src = AddIntConstantCopy(constants, nodes, node, src);
+ node.SetSource(srcIndex, src);
+ }
+ }
+ }
+ }
+ private static void HandleDestructiveRegCopy(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.Destination == default || node.SourcesCount == 0)
+ {
+ return;
+ }
+ Operand dest = node.Destination;
+ Operand src1 = node.GetSource(0);
+ if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable)
+ {
+ bool useNewLocal = false;
+ for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ if (node.GetSource(srcIndex) == dest)
+ {
+ useNewLocal = true;
+ break;
+ }
+ }
+ if (useNewLocal)
+ {
+ // Dest is being used as some source already, we need to use a new
+ // local to store the temporary value, otherwise the value on dest
+ // local would be overwritten.
+ Operand temp = Local(dest.Type);
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1));
+ node.SetSource(0, temp);
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
+ node.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1));
+ node.SetSource(0, dest);
+ }
+ }
+ }
+ private static void HandleCall(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
+ {
+ Operation operation = node;
+ Operand dest = operation.Destination;
+ List<Operand> sources = new List<Operand>
+ {
+ operation.GetSource(0)
+ };
+ int argsCount = operation.SourcesCount - 1;
+ int intMax = CallingConvention.GetArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetArgumentsOnRegsCount();
+ int intCount = 0;
+ int vecCount = 0;
+ int stackOffset = 0;
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(index + 1);
+ bool passOnReg;
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < intMax;
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+ continue;
+ }
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+ HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp));
+ sources.Add(argReg);
+ }
+ else
+ {
+ Operand offset = Const(stackOffset);
+ Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
+ HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, spillOp));
+ stackOffset += source.Type.GetSizeInBytes();
+ }
+ }
+ if (dest != default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg));
+ nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
+ operation.Destination = default;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+ Operation copyOp = Operation(Instruction.Copy, dest, retReg);
+ nodes.AddAfter(node, copyOp);
+ operation.Destination = retReg;
+ }
+ }
+ operation.SetSources(sources.ToArray());
+ }
+ private static void HandleTailcall(
+ ConstantDict constants,
+ IntrusiveList<Operation> nodes,
+ StackAllocator stackAlloc,
+ Operation node,
+ Operation operation)
+ {
+ List<Operand> sources = new List<Operand>
+ {
+ operation.GetSource(0)
+ };
+ int argsCount = operation.SourcesCount - 1;
+ int intMax = CallingConvention.GetArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetArgumentsOnRegsCount();
+ int intCount = 0;
+ int vecCount = 0;
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(1 + index);
+ bool passOnReg;
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+ continue;
+ }
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+ HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp));
+ sources.Add(argReg);
+ }
+ else
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+ }
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand tcAddress = Gpr(CodeGenCommon.TcAddressRegister, OperandType.I64);
+ Operation addrCopyOp = Operation(Instruction.Copy, tcAddress, operation.GetSource(0));
+ nodes.AddBefore(node, addrCopyOp);
+ sources[0] = tcAddress;
+ operation.SetSources(sources.ToArray());
+ }
+ private static Operation HandleCompareAndSwap(IntrusiveList<Operation> nodes, Operation node)
+ {
+ Operand expected = node.GetSource(1);
+ if (expected.Type == OperandType.V128)
+ {
+ Operand dest = node.Destination;
+ Operand expectedLow = Local(OperandType.I64);
+ Operand expectedHigh = Local(OperandType.I64);
+ Operand desiredLow = Local(OperandType.I64);
+ Operand desiredHigh = Local(OperandType.I64);
+ Operand actualLow = Local(OperandType.I64);
+ Operand actualHigh = Local(OperandType.I64);
+ Operand address = node.GetSource(0);
+ Operand desired = node.GetSource(2);
+ void SplitOperand(Operand source, Operand low, Operand high)
+ {
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, low, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, high, source, Const(1)));
+ }
+ SplitOperand(expected, expectedLow, expectedHigh);
+ SplitOperand(desired, desiredLow, desiredHigh);
+ Operation operation = node;
+ // Update the sources and destinations with split 64-bit halfs of the whole 128-bit values.
+ // We also need a additional registers that will be used to store temporary information.
+ operation.SetDestinations(new[] { actualLow, actualHigh, Local(OperandType.I64), Local(OperandType.I64) });
+ operation.SetSources(new[] { address, expectedLow, expectedHigh, desiredLow, desiredHigh });
+ // Add some dummy uses of the input operands, as the CAS operation will be a loop,
+ // so they can't be used as destination operand.
+ for (int i = 0; i < operation.SourcesCount; i++)
+ {
+ Operand src = operation.GetSource(i);
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
+ }
+ // Assemble the vector with the 64-bit values at the given memory location.
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, actualLow));
+ node = nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, actualHigh, Const(1)));
+ }
+ else
+ {
+ // We need a additional register where the store result will be written to.
+ node.SetDestinations(new[] { node.Destination, Local(OperandType.I32) });
+ // Add some dummy uses of the input operands, as the CAS operation will be a loop,
+ // so they can't be used as destination operand.
+ Operation operation = node;
+ for (int i = 0; i < operation.SourcesCount; i++)
+ {
+ Operand src = operation.GetSource(i);
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
+ }
+ }
+ return node.ListNext;
+ }
+ private static void HandleReturn(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.SourcesCount == 0)
+ {
+ return;
+ }
+ Operand source = node.GetSource(0);
+ if (source.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
+ }
+ else
+ {
+ Operand retReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+ Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
+ nodes.AddBefore(node, retCopyOp);
+ }
+ }
+ private static Operation HandleLoadArgument(
+ CompilerContext cctx,
+ ref Span<Operation> buffer,
+ IntrusiveList<Operation> nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ Operand source = node.GetSource(0);
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+ int index = source.AsInt32();
+ int intCount = 0;
+ int vecCount = 0;
+ for (int cIndex = 0; cIndex < index; cIndex++)
+ {
+ OperandType argType = cctx.FuncArgTypes[cIndex];
+ if (argType.IsInteger())
+ {
+ intCount++;
+ }
+ else if (argType == OperandType.V128)
+ {
+ intCount += 2;
+ }
+ else
+ {
+ vecCount++;
+ }
+ }
+ bool passOnReg;
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < CallingConvention.GetArgumentsOnRegsCount();
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < CallingConvention.GetArgumentsOnRegsCount();
+ }
+ else
+ {
+ passOnReg = vecCount < CallingConvention.GetArgumentsOnRegsCount();
+ }
+ if (passOnReg)
+ {
+ Operand dest = node.Destination;
+ if (preservedArgs[index] == default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand pArg = Local(OperandType.V128);
+ Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
+ Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
+ Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg);
+ Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
+ cctx.Cfg.Entry.Operations.AddFirst(copyH);
+ cctx.Cfg.Entry.Operations.AddFirst(copyL);
+ preservedArgs[index] = pArg;
+ }
+ else
+ {
+ Operand pArg = Local(dest.Type);
+ Operand argReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
+ Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+ preservedArgs[index] = pArg;
+ }
+ }
+ Operation nextNode;
+ if (dest.AssignmentsCount == 1)
+ {
+ // Let's propagate the argument if we can to avoid copies.
+ Propagate(ref buffer, dest, preservedArgs[index]);
+ nextNode = node.ListNext;
+ }
+ else
+ {
+ Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]);
+ nextNode = nodes.AddBefore(node, argCopyOp);
+ }
+ Delete(nodes, node);
+ return nextNode;
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ return node;
+ }
+ }
+ private static void Propagate(ref Span<Operation> buffer, Operand dest, Operand value)
+ {
+ ReadOnlySpan<Operation> uses = dest.GetUses(ref buffer);
+ foreach (Operation use in uses)
+ {
+ for (int srcIndex = 0; srcIndex < use.SourcesCount; srcIndex++)
+ {
+ Operand useSrc = use.GetSource(srcIndex);
+ if (useSrc == dest)
+ {
+ use.SetSource(srcIndex, value);
+ }
+ else if (useSrc.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memoryOp = useSrc.GetMemory();
+ Operand baseAddr = memoryOp.BaseAddress;
+ Operand index = memoryOp.Index;
+ bool changed = false;
+ if (baseAddr == dest)
+ {
+ baseAddr = value;
+ changed = true;
+ }
+ if (index == dest)
+ {
+ index = value;
+ changed = true;
+ }
+ if (changed)
+ {
+ use.SetSource(srcIndex, MemoryOp(
+ useSrc.Type,
+ baseAddr,
+ index,
+ memoryOp.Scale,
+ memoryOp.Displacement));
+ }
+ }
+ }
+ }
+ }
+ private static Operand AddFloatConstantCopy(
+ ConstantDict constants,
+ IntrusiveList<Operation> nodes,
+ Operation node,
+ Operand source)
+ {
+ Operand temp = Local(source.Type);
+ Operand intConst = AddIntConstantCopy(constants, nodes, node, GetIntConst(source));
+ Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst);
+ nodes.AddBefore(node, copyOp);
+ return temp;
+ }
+ private static Operand AddIntConstantCopy(
+ ConstantDict constants,
+ IntrusiveList<Operation> nodes,
+ Operation node,
+ Operand source)
+ {
+ if (constants.TryGetValue(source.Value, source.Type, out Operand temp))
+ {
+ return temp;
+ }
+ temp = Local(source.Type);
+ Operation copyOp = Operation(Instruction.Copy, temp, source);
+ nodes.AddBefore(node, copyOp);
+ constants.Add(source.Value, source.Type, temp);
+ return temp;
+ }
+ private static Operand GetIntConst(Operand value)
+ {
+ if (value.Type == OperandType.FP32)
+ {
+ return Const(value.AsInt32());
+ }
+ else if (value.Type == OperandType.FP64)
+ {
+ return Const(value.AsInt64());
+ }
+ return value;
+ }
+ private static void Delete(IntrusiveList<Operation> nodes, Operation node)
+ {
+ node.Destination = default;
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, default);
+ }
+ nodes.Remove(node);
+ }
+ private static Operand Gpr(int register, OperandType type)
+ {
+ return Register(register, RegisterType.Integer, type);
+ }
+ private static Operand Xmm(int register, OperandType type)
+ {
+ return Register(register, RegisterType.Vector, type);
+ }
+ private static bool IsSameOperandDestSrc1(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Extended:
+ return IsSameOperandDestSrc1(operation.Intrinsic);
+ case Instruction.VectorInsert:
+ case Instruction.VectorInsert16:
+ case Instruction.VectorInsert8:
+ return true;
+ }
+ return false;
+ }
+ private static bool IsSameOperandDestSrc1(Intrinsic intrinsic)
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+ return info.Type == IntrinsicType.ScalarBinaryRd ||
+ info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
+ info.Type == IntrinsicType.ScalarTernaryShlRd ||
+ info.Type == IntrinsicType.ScalarTernaryShrRd ||
+ info.Type == IntrinsicType.VectorBinaryRd ||
+ info.Type == IntrinsicType.VectorInsertByElem ||
+ info.Type == IntrinsicType.VectorTernaryRd ||
+ info.Type == IntrinsicType.VectorTernaryRdBitwise ||
+ info.Type == IntrinsicType.VectorTernaryFPRdByElem ||
+ info.Type == IntrinsicType.VectorTernaryRdByElem ||
+ info.Type == IntrinsicType.VectorTernaryShlRd ||
+ info.Type == IntrinsicType.VectorTernaryShrRd;
+ }
+ private static bool HasConstSrc1(Operation node, ulong value)
+ {
+ switch (node.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ case Instruction.Subtract:
+ // The immediate encoding of those instructions does not allow Rn to be
+ // XZR (it will be SP instead), so we can't allow a Rn constant in this case.
+ return value == 0 && NotConstOrConst0(node.GetSource(1));
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseNot:
+ case Instruction.BitwiseOr:
+ case Instruction.ByteSwap:
+ case Instruction.CountLeadingZeros:
+ case Instruction.Multiply:
+ case Instruction.Negate:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ return value == 0;
+ case Instruction.Copy:
+ case Instruction.LoadArgument:
+ case Instruction.Spill:
+ case Instruction.SpillArg:
+ return true;
+ case Instruction.Extended:
+ return value == 0;
+ }
+ return false;
+ }
+ private static bool NotConstOrConst0(Operand operand)
+ {
+ return operand.Kind != OperandKind.Constant || operand.Value == 0;
+ }
+ private static bool HasConstSrc2(Instruction inst, Operand operand)
+ {
+ ulong value = operand.Value;
+ switch (inst)
+ {
+ case Instruction.Add:
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ case Instruction.Subtract:
+ return ConstFitsOnUImm12Sh(value);
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ return value == 0 || CodeGenCommon.TryEncodeBitMask(operand, out _, out _, out _);
+ case Instruction.Multiply:
+ case Instruction.Store:
+ case Instruction.Store16:
+ case Instruction.Store8:
+ return value == 0;
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.VectorExtract:
+ case Instruction.VectorExtract16:
+ case Instruction.VectorExtract8:
+ return true;
+ case Instruction.Extended:
+ // TODO: Check if actual intrinsic is supposed to have consts here?
+ // Right now we only hit this case for fixed-point int <-> FP conversion instructions.
+ return true;
+ }
+ return false;
+ }
+ private static bool IsCommutative(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.Multiply:
+ return true;
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ {
+ Operand comp = operation.GetSource(2);
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+ var compType = (Comparison)comp.AsInt32();
+ return compType == Comparison.Equal || compType == Comparison.NotEqual;
+ }
+ }
+ return false;
+ }
+ private static bool ConstFitsOnUImm12Sh(ulong value)
+ {
+ return (value & ~0xfffUL) == 0 || (value & ~0xfff000UL) == 0;
+ }
+ private static bool IsIntrinsicWithConst(Operation operation)
+ {
+ bool isIntrinsic = IsIntrinsic(operation.Instruction);
+ if (isIntrinsic)
+ {
+ Intrinsic intrinsic = operation.Intrinsic;
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+ // Those have integer inputs that don't support consts.
+ return info.Type != IntrinsicType.ScalarFPConvGpr &&
+ info.Type != IntrinsicType.ScalarFPConvFixedGpr &&
+ info.Type != IntrinsicType.SetRegister;
+ }
+ return false;
+ }
+ private static bool IsIntrinsic(Instruction inst)
+ {
+ return inst == Instruction.Extended;
+ }
+ }
diff --git a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
index 0423c255..c5a22a53 100644
--- a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
+++ b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
@@ -90,6 +90,47 @@ namespace ARMeilleure.CodeGen.Optimizations
+ case Instruction.Compare:
+ if (type == OperandType.I32 &&
+ operation.GetSource(0).Type == type &&
+ operation.GetSource(1).Type == type)
+ {
+ switch ((Comparison)operation.GetSource(2).Value)
+ {
+ case Comparison.Equal:
+ EvaluateBinaryI32(operation, (x, y) => x == y ? 1 : 0);
+ break;
+ case Comparison.NotEqual:
+ EvaluateBinaryI32(operation, (x, y) => x != y ? 1 : 0);
+ break;
+ case Comparison.Greater:
+ EvaluateBinaryI32(operation, (x, y) => x > y ? 1 : 0);
+ break;
+ case Comparison.LessOrEqual:
+ EvaluateBinaryI32(operation, (x, y) => x <= y ? 1 : 0);
+ break;
+ case Comparison.GreaterUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x > (uint)y ? 1 : 0);
+ break;
+ case Comparison.LessOrEqualUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x <= (uint)y ? 1 : 0);
+ break;
+ case Comparison.GreaterOrEqual:
+ EvaluateBinaryI32(operation, (x, y) => x >= y ? 1 : 0);
+ break;
+ case Comparison.Less:
+ EvaluateBinaryI32(operation, (x, y) => x < y ? 1 : 0);
+ break;
+ case Comparison.GreaterOrEqualUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x >= (uint)y ? 1 : 0);
+ break;
+ case Comparison.LessUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x < (uint)y ? 1 : 0);
+ break;
+ }
+ }
+ break;
case Instruction.Copy:
if (type == OperandType.I32)
diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
index 919e996b..a45bb455 100644
--- a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
+++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
@@ -44,8 +44,8 @@ namespace ARMeilleure.CodeGen.Optimizations
- if (DestIsLocalVar(node))
- {
+ if (DestIsSingleLocalVar(node))
+ {
if (IsPropagableCompare(node))
modified |= PropagateCompare(ref buffer, node);
@@ -99,20 +99,6 @@ namespace ARMeilleure.CodeGen.Optimizations
while (modified);
- private static Span<Operation> GetUses(ref Span<Operation> buffer, Operand operand)
- {
- ReadOnlySpan<Operation> uses = operand.Uses;
- if (buffer.Length < uses.Length)
- {
- buffer = Allocators.Default.AllocateSpan<Operation>((uint)uses.Length);
- }
- uses.CopyTo(buffer);
- return buffer.Slice(0, uses.Length);
- }
private static bool PropagateCompare(ref Span<Operation> buffer, Operation compOp)
// Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form
@@ -160,7 +146,7 @@ namespace ARMeilleure.CodeGen.Optimizations
Comparison compType = (Comparison)comp.AsInt32();
- Span<Operation> uses = GetUses(ref buffer, dest);
+ Span<Operation> uses = dest.GetUses(ref buffer);
foreach (Operation use in uses)
@@ -199,7 +185,7 @@ namespace ARMeilleure.CodeGen.Optimizations
Operand dest = copyOp.Destination;
Operand source = copyOp.GetSource(0);
- Span<Operation> uses = GetUses(ref buffer, dest);
+ Span<Operation> uses = dest.GetUses(ref buffer);
foreach (Operation use in uses)
@@ -231,12 +217,12 @@ namespace ARMeilleure.CodeGen.Optimizations
private static bool IsUnused(Operation node)
- return DestIsLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node);
+ return DestIsSingleLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node);
- private static bool DestIsLocalVar(Operation node)
+ private static bool DestIsSingleLocalVar(Operation node)
- return node.Destination != default && node.Destination.Kind == OperandKind.LocalVariable;
+ return node.DestinationsCount == 1 && node.Destination.Kind == OperandKind.LocalVariable;
private static bool HasSideEffects(Operation node)
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
index d8a40365..6ea62c28 100644
--- a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
@@ -17,8 +17,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
private const int InstructionGap = 2;
private const int InstructionGapMask = InstructionGap - 1;
- private const int RegistersCount = 16;
private HashSet<int> _blockEdges;
private LiveRange[] _blockRanges;
private BitMap[] _blockLiveIn;
@@ -59,7 +57,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
void PopulateFreePositions(RegisterType type, out int[] positions, out int count)
- positions = new int[RegistersCount];
+ positions = new int[masks.RegistersCount];
count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type));
int mask = masks.GetAvailableRegisters(type);
@@ -115,7 +113,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
StackAllocator stackAlloc,
RegisterMasks regMasks)
- NumberLocals(cfg);
+ NumberLocals(cfg, regMasks.RegistersCount);
var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count);
@@ -134,22 +132,25 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
- if (current.Register.Type == RegisterType.Integer)
- {
- context.IntUsedRegisters |= 1 << current.Register.Index;
- }
- else /* if (interval.Register.Type == RegisterType.Vector) */
+ if (current.IsFixedAndUsed)
- context.VecUsedRegisters |= 1 << current.Register.Index;
+ if (current.Register.Type == RegisterType.Integer)
+ {
+ context.IntUsedRegisters |= 1 << current.Register.Index;
+ }
+ else /* if (interval.Register.Type == RegisterType.Vector) */
+ {
+ context.VecUsedRegisters |= 1 << current.Register.Index;
+ }
- AllocateInterval(context, current, index);
+ AllocateInterval(context, current, index, regMasks.RegistersCount);
- for (int index = RegistersCount * 2; index < _intervals.Count; index++)
+ for (int index = regMasks.RegistersCount * 2; index < _intervals.Count; index++)
if (!_intervals[index].IsSpilled)
@@ -163,7 +164,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize);
- private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex)
+ private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
// Check active intervals that already ended.
foreach (int iIndex in context.Active)
@@ -199,17 +200,17 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
- if (!TryAllocateRegWithoutSpill(context, current, cIndex))
+ if (!TryAllocateRegWithoutSpill(context, current, cIndex, registersCount))
- AllocateRegWithSpill(context, current, cIndex);
+ AllocateRegWithSpill(context, current, cIndex, registersCount);
- private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex)
+ private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
RegisterType regType = current.Local.Type.ToRegisterType();
- Span<int> freePositions = stackalloc int[RegistersCount];
+ Span<int> freePositions = stackalloc int[registersCount];
context.GetFreePositions(regType, freePositions, out int freePositionsCount);
@@ -278,7 +279,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
- InsertInterval(splitChild);
+ InsertInterval(splitChild, registersCount);
@@ -302,12 +303,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return true;
- private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex)
+ private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
RegisterType regType = current.Local.Type.ToRegisterType();
- Span<int> usePositions = stackalloc int[RegistersCount];
- Span<int> blockedPositions = stackalloc int[RegistersCount];
+ Span<int> usePositions = stackalloc int[registersCount];
+ Span<int> blockedPositions = stackalloc int[registersCount];
context.GetFreePositions(regType, usePositions, out _);
context.GetFreePositions(regType, blockedPositions, out _);
@@ -386,7 +387,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
- InsertInterval(splitChild);
+ InsertInterval(splitChild, registersCount);
Spill(context, current);
@@ -396,7 +397,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
// so we only need to split the intervals using the selected register.
current.Register = new Register(selectedReg, regType);
- SplitAndSpillOverlappingIntervals(context, current);
+ SplitAndSpillOverlappingIntervals(context, current, registersCount);
@@ -417,14 +418,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
- InsertInterval(splitChild);
+ InsertInterval(splitChild, registersCount);
Spill(context, splitChild);
- SplitAndSpillOverlappingIntervals(context, current);
+ SplitAndSpillOverlappingIntervals(context, current, registersCount);
@@ -460,7 +461,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return selected;
- private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current)
+ private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current, int registersCount)
foreach (int iIndex in context.Active)
@@ -468,7 +469,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
if (!interval.IsFixed && interval.Register == current.Register)
- SplitAndSpillOverlappingInterval(context, current, interval);
+ SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
@@ -480,7 +481,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current))
- SplitAndSpillOverlappingInterval(context, current, interval);
+ SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
@@ -490,7 +491,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
private void SplitAndSpillOverlappingInterval(
AllocationContext context,
LiveInterval current,
- LiveInterval interval)
+ LiveInterval interval,
+ int registersCount)
// If there's a next use after the start of the current interval,
// we need to split the spilled interval twice, and re-insert it
@@ -522,7 +524,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
splitChild = right;
- InsertInterval(splitChild);
+ InsertInterval(splitChild, registersCount);
@@ -530,13 +532,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
- private void InsertInterval(LiveInterval interval)
+ private void InsertInterval(LiveInterval interval, int registersCount)
Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses.");
Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval.");
Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval.");
- int startIndex = RegistersCount * 2;
+ int startIndex = registersCount * 2;
int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null);
@@ -790,12 +792,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return _operationNodes[position / InstructionGap];
- private void NumberLocals(ControlFlowGraph cfg)
+ private void NumberLocals(ControlFlowGraph cfg, int registersCount)
_operationNodes = new List<(IntrusiveList<Operation>, Operation)>();
_intervals = new List<LiveInterval>();
- for (int index = 0; index < RegistersCount; index++)
+ for (int index = 0; index < registersCount; index++)
_intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer)));
_intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector)));
@@ -1041,6 +1043,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
LiveInterval interval = _intervals[GetOperandId(dest)];
+ if (interval.IsFixed)
+ {
+ interval.IsFixedAndUsed = true;
+ }
interval.SetStart(operationPos + 1);
interval.AddUsePosition(operationPos + 1);
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
index 77ad9541..d739ad28 100644
--- a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
@@ -27,6 +27,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
public Register Register;
public bool IsFixed;
+ public bool IsFixedAndUsed;
private readonly Data* _data;
@@ -44,6 +45,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
public ref int SpillOffset => ref _data->SpillOffset;
public bool IsFixed => _data->IsFixed;
+ public ref bool IsFixedAndUsed => ref _data->IsFixedAndUsed;
public bool IsEmpty => FirstRange == default;
public bool IsSplit => Children.Count != 0;
public bool IsSpilled => SpillOffset != -1;
@@ -114,7 +116,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
- FirstRange = new LiveRange(position, position + 1);
+ FirstRange = new LiveRange(position, position + 1);
End = position + 1;
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
index 5b11aac2..bc948f95 100644
--- a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
+++ b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
@@ -11,6 +11,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
public int VecCallerSavedRegisters { get; }
public int IntCalleeSavedRegisters { get; }
public int VecCalleeSavedRegisters { get; }
+ public int RegistersCount { get; }
public RegisterMasks(
int intAvailableRegisters,
@@ -18,7 +19,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
int intCallerSavedRegisters,
int vecCallerSavedRegisters,
int intCalleeSavedRegisters,
- int vecCalleeSavedRegisters)
+ int vecCalleeSavedRegisters,
+ int registersCount)
IntAvailableRegisters = intAvailableRegisters;
VecAvailableRegisters = vecAvailableRegisters;
@@ -26,6 +28,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
VecCallerSavedRegisters = vecCallerSavedRegisters;
IntCalleeSavedRegisters = intCalleeSavedRegisters;
VecCalleeSavedRegisters = vecCalleeSavedRegisters;
+ RegistersCount = registersCount;
public int GetAvailableRegisters(RegisterType type)
diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
index e589da14..8b5a3fc5 100644
--- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs
+++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -16,6 +16,7 @@ namespace ARMeilleure.CodeGen.X86
static class CodeGenerator
+ private const int RegistersCount = 16;
private const int PageSize = 0x1000;
private const int StackGuardSize = 0x2000;
@@ -143,7 +144,8 @@ namespace ARMeilleure.CodeGen.X86
- CallingConvention.GetVecCalleeSavedRegisters());
+ CallingConvention.GetVecCalleeSavedRegisters(),
+ RegistersCount);
AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
index 6407a9a7..8c909ac1 100644
--- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -5,8 +5,6 @@ namespace ARMeilleure.CodeGen.X86
static class IntrinsicTable
- private const int BadOp = 0;
private static IntrinsicInfo[] _intrinTable;
static IntrinsicTable()