diff options
author | gdkchan <gab.dark.100@gmail.com> | 2019-08-08 15:56:22 -0300 |
---|---|---|
committer | emmauss <emmausssss@gmail.com> | 2019-08-08 21:56:22 +0300 |
commit | a731ab3a2aad56e6ceb8b4e2444a61353246295c (patch) | |
tree | c7f13f51bfec6b19431e62167811ae31e9d2fea9 /ARMeilleure/CodeGen | |
parent | 1ba58e9942e54175e3f3a0e1d57a48537f4888b1 (diff) |
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
Diffstat (limited to 'ARMeilleure/CodeGen')
28 files changed, 8033 insertions, 0 deletions
diff --git a/ARMeilleure/CodeGen/CompiledFunction.cs b/ARMeilleure/CodeGen/CompiledFunction.cs new file mode 100644 index 00000000..61e89c24 --- /dev/null +++ b/ARMeilleure/CodeGen/CompiledFunction.cs @@ -0,0 +1,17 @@ +using ARMeilleure.CodeGen.Unwinding; + +namespace ARMeilleure.CodeGen +{ + struct CompiledFunction + { + public byte[] Code { get; } + + public UnwindInfo UnwindInfo { get; } + + public CompiledFunction(byte[] code, UnwindInfo unwindInfo) + { + Code = code; + UnwindInfo = unwindInfo; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs new file mode 100644 index 00000000..84eedee0 --- /dev/null +++ b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs @@ -0,0 +1,258 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class ConstantFolding + { + public static void RunPass(Operation operation) + { + if (operation.Destination == null || operation.SourcesCount == 0) + { + return; + } + + if (!AreAllSourcesConstant(operation)) + { + return; + } + + OperandType type = operation.Destination.Type; + + switch (operation.Instruction) + { + case Instruction.Add: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x + y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x + y); + } + break; + + case Instruction.BitwiseAnd: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x & y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x & y); + } + break; + + case Instruction.BitwiseExclusiveOr: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x ^ y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x ^ y); + } + break; + + case Instruction.BitwiseNot: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => ~x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => ~x); + } + break; + + case Instruction.BitwiseOr: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x | y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x | y); + } + break; + + case Instruction.Copy: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => x); + } + break; + + case Instruction.Divide: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => y != 0 ? x / y : 0); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => y != 0 ? x / y : 0); + } + break; + + case Instruction.DivideUI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => y != 0 ? (int)((uint)x / (uint)y) : 0); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => y != 0 ? (long)((ulong)x / (ulong)y) : 0); + } + break; + + case Instruction.Multiply: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x * y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x * y); + } + break; + + case Instruction.Negate: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => -x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => -x); + } + break; + + case Instruction.ShiftLeft: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x << y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x << (int)y); + } + break; + + case Instruction.ShiftRightSI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x >> y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x >> (int)y); + } + break; + + case Instruction.ShiftRightUI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => (int)((uint)x >> y)); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => (long)((ulong)x >> (int)y)); + } + break; + + case Instruction.SignExtend16: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (short)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (short)x); + } + break; + + case Instruction.SignExtend32: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (int)x); + } + break; + + case Instruction.SignExtend8: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (sbyte)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (sbyte)x); + } + break; + + case Instruction.Subtract: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x - y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x - y); + } + break; + } + } + + private static bool AreAllSourcesConstant(Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + if (operation.GetSource(index).Kind != OperandKind.Constant) + { + return false; + } + } + + return true; + } + + private static void EvaluateUnaryI32(Operation operation, Func<int, int> op) + { + int x = operation.GetSource(0).AsInt32(); + + operation.TurnIntoCopy(Const(op(x))); + } + + private static void EvaluateUnaryI64(Operation operation, Func<long, long> op) + { + long x = operation.GetSource(0).AsInt64(); + + operation.TurnIntoCopy(Const(op(x))); + } + + private static void EvaluateBinaryI32(Operation operation, Func<int, int, int> op) + { + int x = operation.GetSource(0).AsInt32(); + int y = operation.GetSource(1).AsInt32(); + + operation.TurnIntoCopy(Const(op(x, y))); + } + + private static void EvaluateBinaryI64(Operation operation, Func<long, long, long> op) + { + long x = operation.GetSource(0).AsInt64(); + long y = operation.GetSource(1).AsInt64(); + + operation.TurnIntoCopy(Const(op(x, y))); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs new file mode 100644 index 00000000..c01a8f1e --- /dev/null +++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs @@ -0,0 +1,126 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class Optimizer + { + public static void RunPass(ControlFlowGraph cfg) + { + bool modified; + + do + { + modified = false; + + foreach (BasicBlock block in cfg.Blocks) + { + LinkedListNode<Node> node = block.Operations.First; + + while (node != null) + { + LinkedListNode<Node> nextNode = node.Next; + + bool isUnused = IsUnused(node.Value); + + if (!(node.Value is Operation operation) || isUnused) + { + if (isUnused) + { + RemoveNode(block, node); + + modified = true; + } + + node = nextNode; + + continue; + } + + ConstantFolding.RunPass(operation); + + Simplification.RunPass(operation); + + if (DestIsLocalVar(operation) && IsPropagableCopy(operation)) + { + PropagateCopy(operation); + + RemoveNode(block, node); + + modified = true; + } + + node = nextNode; + } + } + } + while (modified); + } + + private static void PropagateCopy(Operation copyOp) + { + // Propagate copy source operand to all uses of the destination operand. + Operand dest = copyOp.Destination; + Operand source = copyOp.GetSource(0); + + Node[] uses = dest.Uses.ToArray(); + + foreach (Node use in uses) + { + for (int index = 0; index < use.SourcesCount; index++) + { + if (use.GetSource(index) == dest) + { + use.SetSource(index, source); + } + } + } + } + + private static void RemoveNode(BasicBlock block, LinkedListNode<Node> llNode) + { + // Remove a node from the nodes list, and also remove itself + // from all the use lists on the operands that this node uses. + block.Operations.Remove(llNode); + + Node node = llNode.Value; + + for (int index = 0; index < node.SourcesCount; index++) + { + node.SetSource(index, null); + } + + Debug.Assert(node.Destination == null || node.Destination.Uses.Count == 0); + + node.Destination = null; + } + + private static bool IsUnused(Node node) + { + return DestIsLocalVar(node) && node.Destination.Uses.Count == 0 && !HasSideEffects(node); + } + + private static bool DestIsLocalVar(Node node) + { + return node.Destination != null && node.Destination.Kind == OperandKind.LocalVariable; + } + + private static bool HasSideEffects(Node node) + { + return (node is Operation operation) && operation.Instruction == Instruction.Call; + } + + private static bool IsPropagableCopy(Operation operation) + { + if (operation.Instruction != Instruction.Copy) + { + return false; + } + + return operation.Destination.Type == operation.GetSource(0).Type; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Optimizations/Simplification.cs b/ARMeilleure/CodeGen/Optimizations/Simplification.cs new file mode 100644 index 00000000..cafc025c --- /dev/null +++ b/ARMeilleure/CodeGen/Optimizations/Simplification.cs @@ -0,0 +1,157 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class Simplification + { + public static void RunPass(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + case Instruction.BitwiseExclusiveOr: + TryEliminateBinaryOpComutative(operation, 0); + break; + + case Instruction.BitwiseAnd: + TryEliminateBitwiseAnd(operation); + break; + + case Instruction.BitwiseOr: + TryEliminateBitwiseOr(operation); + break; + + case Instruction.ConditionalSelect: + TryEliminateConditionalSelect(operation); + break; + + case Instruction.Divide: + TryEliminateBinaryOpY(operation, 1); + break; + + case Instruction.Multiply: + TryEliminateBinaryOpComutative(operation, 1); + break; + + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.Subtract: + TryEliminateBinaryOpY(operation, 0); + break; + } + } + + private static void TryEliminateBitwiseAnd(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y, + // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000 + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, AllOnes(x.Type))) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, AllOnes(y.Type))) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, 0) || IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(Const(0)); + } + } + + private static void TryEliminateBitwiseOr(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x | 0x00000000 == x, 0x00000000 | y == y, + // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, 0)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, AllOnes(x.Type)) || IsConstEqual(y, AllOnes(y.Type))) + { + operation.TurnIntoCopy(Const(AllOnes(x.Type))); + } + } + + private static void TryEliminateBinaryOpY(Operation operation, ulong comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateBinaryOpComutative(Operation operation, ulong comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, comparand)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateConditionalSelect(Operation operation) + { + Operand cond = operation.GetSource(0); + + if (cond.Kind != OperandKind.Constant) + { + return; + } + + // The condition is constant, we can turn it into a copy, and select + // the source based on the condition value. + int srcIndex = cond.Value != 0 ? 1 : 2; + + Operand source = operation.GetSource(srcIndex); + + operation.TurnIntoCopy(source); + } + + private static bool IsConstEqual(Operand operand, ulong comparand) + { + if (operand.Kind != OperandKind.Constant || !operand.Type.IsInteger()) + { + return false; + } + + return operand.Value == comparand; + } + + private static ulong AllOnes(OperandType type) + { + switch (type) + { + case OperandType.I32: return ~0U; + case OperandType.I64: return ~0UL; + } + + throw new ArgumentException("Invalid operand type \"" + type + "\"."); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs new file mode 100644 index 00000000..94ac6991 --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + struct AllocationResult + { + public int IntUsedRegisters { get; } + public int VecUsedRegisters { get; } + public int SpillRegionSize { get; } + + public AllocationResult( + int intUsedRegisters, + int vecUsedRegisters, + int spillRegionSize) + { + IntUsedRegisters = intUsedRegisters; + VecUsedRegisters = vecUsedRegisters; + SpillRegionSize = spillRegionSize; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs new file mode 100644 index 00000000..65901e80 --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs @@ -0,0 +1,246 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class CopyResolver + { + private class ParallelCopy + { + private struct Copy + { + public Register Dest { get; } + public Register Source { get; } + + public OperandType Type { get; } + + public Copy(Register dest, Register source, OperandType type) + { + Dest = dest; + Source = source; + Type = type; + } + } + + private List<Copy> _copies; + + public int Count => _copies.Count; + + public ParallelCopy() + { + _copies = new List<Copy>(); + } + + public void AddCopy(Register dest, Register source, OperandType type) + { + _copies.Add(new Copy(dest, source, type)); + } + + public void Sequence(List<Operation> sequence) + { + Dictionary<Register, Register> locations = new Dictionary<Register, Register>(); + Dictionary<Register, Register> sources = new Dictionary<Register, Register>(); + + Dictionary<Register, OperandType> types = new Dictionary<Register, OperandType>(); + + Queue<Register> pendingQueue = new Queue<Register>(); + Queue<Register> readyQueue = new Queue<Register>(); + + foreach (Copy copy in _copies) + { + locations[copy.Source] = copy.Source; + sources[copy.Dest] = copy.Source; + types[copy.Dest] = copy.Type; + + pendingQueue.Enqueue(copy.Dest); + } + + foreach (Copy copy in _copies) + { + // If the destination is not used anywhere, we can assign it immediately. + if (!locations.ContainsKey(copy.Dest)) + { + readyQueue.Enqueue(copy.Dest); + } + } + + while (pendingQueue.TryDequeue(out Register current)) + { + Register copyDest; + Register origSource; + Register copySource; + + while (readyQueue.TryDequeue(out copyDest)) + { + origSource = sources[copyDest]; + copySource = locations[origSource]; + + OperandType type = types[copyDest]; + + EmitCopy(sequence, GetRegister(copyDest, type), GetRegister(copySource, type)); + + locations[origSource] = copyDest; + + if (origSource == copySource && sources.ContainsKey(origSource)) + { + readyQueue.Enqueue(origSource); + } + } + + copyDest = current; + origSource = sources[copyDest]; + copySource = locations[origSource]; + + if (copyDest != copySource) + { + OperandType type = types[copyDest]; + + type = type.IsInteger() ? OperandType.I64 : OperandType.V128; + + EmitXorSwap(sequence, GetRegister(copyDest, type), GetRegister(copySource, type)); + + locations[origSource] = copyDest; + + Register swapOther = copySource; + + if (copyDest != locations[sources[copySource]]) + { + // Find the other swap destination register. + // To do that, we search all the pending registers, and pick + // the one where the copy source register is equal to the + // current destination register being processed (copyDest). + foreach (Register pending in pendingQueue) + { + // Is this a copy of pending <- copyDest? + if (copyDest == locations[sources[pending]]) + { + swapOther = pending; + + break; + } + } + } + + // The value that was previously at "copyDest" now lives on + // "copySource" thanks to the swap, now we need to update the + // location for the next copy that is supposed to copy the value + // that used to live on "copyDest". + locations[sources[swapOther]] = copySource; + } + } + } + + private static void EmitCopy(List<Operation> sequence, Operand x, Operand y) + { + sequence.Add(new Operation(Instruction.Copy, x, y)); + } + + private static void EmitXorSwap(List<Operation> sequence, Operand x, Operand y) + { + sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y)); + sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, y, y, x)); + sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y)); + } + } + + private Queue<Operation> _fillQueue = new Queue<Operation>(); + private Queue<Operation> _spillQueue = new Queue<Operation>(); + + private ParallelCopy _parallelCopy; + + public bool HasCopy { get; private set; } + + public CopyResolver() + { + _fillQueue = new Queue<Operation>(); + _spillQueue = new Queue<Operation>(); + + _parallelCopy = new ParallelCopy(); + } + + public void AddSplit(LiveInterval left, LiveInterval right) + { + if (left.Local != right.Local) + { + throw new ArgumentException("Intervals of different variables are not allowed."); + } + + OperandType type = left.Local.Type; + + if (left.IsSpilled && !right.IsSpilled) + { + // Move from the stack to a register. + AddSplitFill(left, right, type); + } + else if (!left.IsSpilled && right.IsSpilled) + { + // Move from a register to the stack. + AddSplitSpill(left, right, type); + } + else if (!left.IsSpilled && !right.IsSpilled && left.Register != right.Register) + { + // Move from one register to another. + AddSplitCopy(left, right, type); + } + else if (left.SpillOffset != right.SpillOffset) + { + // This would be the stack-to-stack move case, but this is not supported. + throw new ArgumentException("Both intervals were spilled."); + } + } + + private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type) + { + Operand register = GetRegister(right.Register, type); + + Operand offset = new Operand(left.SpillOffset); + + _fillQueue.Enqueue(new Operation(Instruction.Fill, register, offset)); + + HasCopy = true; + } + + private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type) + { + Operand offset = new Operand(right.SpillOffset); + + Operand register = GetRegister(left.Register, type); + + _spillQueue.Enqueue(new Operation(Instruction.Spill, null, offset, register)); + + HasCopy = true; + } + + private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type) + { + _parallelCopy.AddCopy(right.Register, left.Register, type); + + HasCopy = true; + } + + public Operation[] Sequence() + { + List<Operation> sequence = new List<Operation>(); + + while (_spillQueue.TryDequeue(out Operation spillOp)) + { + sequence.Add(spillOp); + } + + _parallelCopy.Sequence(sequence); + + while (_fillQueue.TryDequeue(out Operation fillOp)) + { + sequence.Add(fillOp); + } + + return sequence.ToArray(); + } + + private static Operand GetRegister(Register reg, OperandType type) + { + return new Operand(reg.Index, reg.Type, type); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs new file mode 100644 index 00000000..9a827420 --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs @@ -0,0 +1,382 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using System.Diagnostics; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class HybridAllocator : IRegisterAllocator + { + private const int RegistersCount = 16; + private const int MaxIROperands = 4; + + private struct BlockInfo + { + public bool HasCall { get; } + + public int IntFixedRegisters { get; } + public int VecFixedRegisters { get; } + + public BlockInfo(bool hasCall, int intFixedRegisters, int vecFixedRegisters) + { + HasCall = hasCall; + IntFixedRegisters = intFixedRegisters; + VecFixedRegisters = vecFixedRegisters; + } + } + + private class LocalInfo + { + public int Uses { get; set; } + public int UseCount { get; set; } + + public bool PreAllocated { get; set; } + public int Register { get; set; } + public int SpillOffset { get; set; } + + public int Sequence { get; set; } + + public Operand Temp { get; set; } + + public OperandType Type { get; } + + private int _first; + private int _last; + + public bool IsBlockLocal => _first == _last; + + public LocalInfo(OperandType type, int uses) + { + Uses = uses; + Type = type; + + _first = -1; + _last = -1; + } + + public void SetBlockIndex(int blkIndex) + { + if (_first == -1 || blkIndex < _first) + { + _first = blkIndex; + } + + if (_last == -1 || blkIndex > _last) + { + _last = blkIndex; + } + } + } + + public AllocationResult RunPass( + ControlFlowGraph cfg, + StackAllocator stackAlloc, + RegisterMasks regMasks) + { + int intUsedRegisters = 0; + int vecUsedRegisters = 0; + + int intFreeRegisters = regMasks.IntAvailableRegisters; + int vecFreeRegisters = regMasks.VecAvailableRegisters; + + BlockInfo[] blockInfo = new BlockInfo[cfg.Blocks.Count]; + + List<LocalInfo> locInfo = new List<LocalInfo>(); + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + int intFixedRegisters = 0; + int vecFixedRegisters = 0; + + bool hasCall = false; + + foreach (Node node in block.Operations) + { + if (node is Operation operation && operation.Instruction == Instruction.Call) + { + hasCall = true; + } + + for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++) + { + Operand source = node.GetSource(srcIndex); + + if (source.Kind == OperandKind.LocalVariable) + { + locInfo[source.AsInt32() - 1].SetBlockIndex(block.Index); + } + } + + for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++) + { + Operand dest = node.GetDestination(dstIndex); + + if (dest.Kind == OperandKind.LocalVariable) + { + LocalInfo info; + + if (dest.Value != 0) + { + info = locInfo[dest.AsInt32() - 1]; + } + else + { + dest.NumberLocal(locInfo.Count + 1); + + info = new LocalInfo(dest.Type, UsesCount(dest)); + + locInfo.Add(info); + } + + info.SetBlockIndex(block.Index); + } + else if (dest.Kind == OperandKind.Register) + { + if (dest.Type.IsInteger()) + { + intFixedRegisters |= 1 << dest.GetRegister().Index; + } + else + { + vecFixedRegisters |= 1 << dest.GetRegister().Index; + } + } + } + } + + blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters); + } + + int sequence = 0; + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + BlockInfo blkInfo = blockInfo[block.Index]; + + int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters; + int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters; + + int intCallerSavedRegisters = blkInfo.HasCall ? regMasks.IntCallerSavedRegisters : 0; + int vecCallerSavedRegisters = blkInfo.HasCall ? regMasks.VecCallerSavedRegisters : 0; + + int intSpillTempRegisters = SelectSpillTemps( + intCallerSavedRegisters & ~blkInfo.IntFixedRegisters, + intLocalFreeRegisters); + int vecSpillTempRegisters = SelectSpillTemps( + vecCallerSavedRegisters & ~blkInfo.VecFixedRegisters, + vecLocalFreeRegisters); + + intLocalFreeRegisters &= ~(intSpillTempRegisters | intCallerSavedRegisters); + vecLocalFreeRegisters &= ~(vecSpillTempRegisters | vecCallerSavedRegisters); + + for (LinkedListNode<Node> llNode = block.Operations.First; llNode != null; llNode = llNode.Next) + { + Node node = llNode.Value; + + int intLocalUse = 0; + int vecLocalUse = 0; + + for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++) + { + Operand source = node.GetSource(srcIndex); + + if (source.Kind != OperandKind.LocalVariable) + { + continue; + } + + LocalInfo info = locInfo[source.AsInt32() - 1]; + + info.UseCount++; + + Debug.Assert(info.UseCount <= info.Uses); + + if (info.Register != -1) + { + node.SetSource(srcIndex, Register(info.Register, source.Type.ToRegisterType(), source.Type)); + + if (info.UseCount == info.Uses && !info.PreAllocated) + { + if (source.Type.IsInteger()) + { + intLocalFreeRegisters |= 1 << info.Register; + } + else + { + vecLocalFreeRegisters |= 1 << info.Register; + } + } + } + else + { + Operand temp = info.Temp; + + if (temp == null || info.Sequence != sequence) + { + temp = source.Type.IsInteger() + ? GetSpillTemp(source, intSpillTempRegisters, ref intLocalUse) + : GetSpillTemp(source, vecSpillTempRegisters, ref vecLocalUse); + + info.Sequence = sequence; + info.Temp = temp; + } + + node.SetSource(srcIndex, temp); + + Operation fillOp = new Operation(Instruction.Fill, temp, Const(info.SpillOffset)); + + block.Operations.AddBefore(llNode, fillOp); + } + } + + int intLocalAsg = 0; + int vecLocalAsg = 0; + + for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++) + { + Operand dest = node.GetDestination(dstIndex); + + if (dest.Kind != OperandKind.LocalVariable) + { + continue; + } + + LocalInfo info = locInfo[dest.AsInt32() - 1]; + + if (info.UseCount == 0 && !info.PreAllocated) + { + int mask = dest.Type.IsInteger() + ? intLocalFreeRegisters + : vecLocalFreeRegisters; + + if (info.IsBlockLocal && mask != 0) + { + int selectedReg = BitUtils.LowestBitSet(mask); + + info.Register = selectedReg; + + if (dest.Type.IsInteger()) + { + intLocalFreeRegisters &= ~(1 << selectedReg); + intUsedRegisters |= 1 << selectedReg; + } + else + { + vecLocalFreeRegisters &= ~(1 << selectedReg); + vecUsedRegisters |= 1 << selectedReg; + } + } + else + { + info.Register = -1; + info.SpillOffset = stackAlloc.Allocate(dest.Type.GetSizeInBytes()); + } + } + + info.UseCount++; + + Debug.Assert(info.UseCount <= info.Uses); + + if (info.Register != -1) + { + node.SetDestination(dstIndex, Register(info.Register, dest.Type.ToRegisterType(), dest.Type)); + } + else + { + Operand temp = info.Temp; + + if (temp == null || info.Sequence != sequence) + { + temp = dest.Type.IsInteger() + ? GetSpillTemp(dest, intSpillTempRegisters, ref intLocalAsg) + : GetSpillTemp(dest, vecSpillTempRegisters, ref vecLocalAsg); + + info.Sequence = sequence; + info.Temp = temp; + } + + node.SetDestination(dstIndex, temp); + + Operation spillOp = new Operation(Instruction.Spill, null, Const(info.SpillOffset), temp); + + llNode = block.Operations.AddAfter(llNode, spillOp); + } + } + + sequence++; + + intUsedRegisters |= intLocalAsg | intLocalUse; + vecUsedRegisters |= vecLocalAsg | vecLocalUse; + } + } + + return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize); + } + + private static int SelectSpillTemps(int mask0, int mask1) + { + int selection = 0; + int count = 0; + + while (count < MaxIROperands && mask0 != 0) + { + int mask = mask0 & -mask0; + + selection |= mask; + + mask0 &= ~mask; + + count++; + } + + while (count < MaxIROperands && mask1 != 0) + { + int mask = mask1 & -mask1; + + selection |= mask; + + mask1 &= ~mask; + + count++; + } + + Debug.Assert(count == MaxIROperands, "No enough registers for spill temps."); + + return selection; + } + + private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask) + { + int selectedReg = BitUtils.LowestBitSet(freeMask & ~useMask); + + useMask |= 1 << selectedReg; + + return Register(selectedReg, local.Type.ToRegisterType(), local.Type); + } + + private static int UsesCount(Operand local) + { + return local.Assignments.Count + local.Uses.Count; + } + + private static IEnumerable<BasicBlock> Successors(BasicBlock block) + { + if (block.Next != null) + { + yield return block.Next; + } + + if (block.Branch != null) + { + yield return block.Branch; + } + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs new file mode 100644 index 00000000..8f236c25 --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs @@ -0,0 +1,12 @@ +using ARMeilleure.Translation; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + interface IRegisterAllocator + { + AllocationResult RunPass( + ControlFlowGraph cfg, + StackAllocator stackAlloc, + RegisterMasks regMasks); + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs new file mode 100644 index 00000000..6d5ecc14 --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs @@ -0,0 +1,1019 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + // Based on: + // "Linear Scan Register Allocation for the Java(tm) HotSpot Client Compiler". + // http://www.christianwimmer.at/Publications/Wimmer04a/Wimmer04a.pdf + class LinearScanAllocator : IRegisterAllocator + { + private const int InstructionGap = 2; + private const int InstructionGapMask = InstructionGap - 1; + + private const int RegistersCount = 16; + + private HashSet<int> _blockEdges; + + private LiveRange[] _blockRanges; + + private BitMap[] _blockLiveIn; + + private List<LiveInterval> _intervals; + + private LiveInterval[] _parentIntervals; + + private List<LinkedListNode<Node>> _operationNodes; + + private int _operationsCount; + + private class AllocationContext + { + public RegisterMasks Masks { get; } + + public StackAllocator StackAlloc { get; } + + public BitMap Active { get; } + public BitMap Inactive { get; } + + public int IntUsedRegisters { get; set; } + public int VecUsedRegisters { get; set; } + + public AllocationContext(StackAllocator stackAlloc, RegisterMasks masks, int intervalsCount) + { + StackAlloc = stackAlloc; + Masks = masks; + + Active = new BitMap(intervalsCount); + Inactive = new BitMap(intervalsCount); + } + + public void MoveActiveToInactive(int bit) + { + Move(Active, Inactive, bit); + } + + public void MoveInactiveToActive(int bit) + { + Move(Inactive, Active, bit); + } + + private static void Move(BitMap source, BitMap dest, int bit) + { + source.Clear(bit); + + dest.Set(bit); + } + } + + public AllocationResult RunPass( + ControlFlowGraph cfg, + StackAllocator stackAlloc, + RegisterMasks regMasks) + { + NumberLocals(cfg); + + AllocationContext context = new AllocationContext(stackAlloc, regMasks, _intervals.Count); + + BuildIntervals(cfg, context); + + for (int index = 0; index < _intervals.Count; index++) + { + LiveInterval current = _intervals[index]; + + if (current.IsEmpty) + { + continue; + } + + if (current.IsFixed) + { + context.Active.Set(index); + + if (current.Register.Type == RegisterType.Integer) + { + context.IntUsedRegisters |= 1 << current.Register.Index; + } + else /* if (interval.Register.Type == RegisterType.Vector) */ + { + context.VecUsedRegisters |= 1 << current.Register.Index; + } + + continue; + } + + AllocateInterval(context, current, index); + } + + for (int index = RegistersCount * 2; index < _intervals.Count; index++) + { + if (!_intervals[index].IsSpilled) + { + ReplaceLocalWithRegister(_intervals[index]); + } + } + + InsertSplitCopies(); + InsertSplitCopiesAtEdges(cfg); + + return new AllocationResult( + context.IntUsedRegisters, + context.VecUsedRegisters, + context.StackAlloc.TotalSize); + } + + private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex) + { + // Check active intervals that already ended. + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.GetEnd() < current.GetStart()) + { + context.Active.Clear(iIndex); + } + else if (!interval.Overlaps(current.GetStart())) + { + context.MoveActiveToInactive(iIndex); + } + } + + // Check inactive intervals that already ended or were reactivated. + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.GetEnd() < current.GetStart()) + { + context.Inactive.Clear(iIndex); + } + else if (interval.Overlaps(current.GetStart())) + { + context.MoveInactiveToActive(iIndex); + } + } + + if (!TryAllocateRegWithoutSpill(context, current, cIndex)) + { + AllocateRegWithSpill(context, current, cIndex); + } + } + + private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex) + { + RegisterType regType = current.Local.Type.ToRegisterType(); + + int availableRegisters = context.Masks.GetAvailableRegisters(regType); + + int[] freePositions = new int[RegistersCount]; + + for (int index = 0; index < RegistersCount; index++) + { + if ((availableRegisters & (1 << index)) != 0) + { + freePositions[index] = int.MaxValue; + } + } + + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.Register.Type == regType) + { + freePositions[interval.Register.Index] = 0; + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.Register.Type == regType) + { + int overlapPosition = interval.GetOverlapPosition(current); + + if (overlapPosition != LiveInterval.NotFound && freePositions[interval.Register.Index] > overlapPosition) + { + freePositions[interval.Register.Index] = overlapPosition; + } + } + } + + int selectedReg = GetHighestValueIndex(freePositions); + + int selectedNextUse = freePositions[selectedReg]; + + // Intervals starts and ends at odd positions, unless they span an entire + // block, in this case they will have ranges at a even position. + // When a interval is loaded from the stack to a register, we can only + // do the split at a odd position, because otherwise the split interval + // that is inserted on the list to be processed may clobber a register + // used by the instruction at the same position as the split. + // The problem only happens when a interval ends exactly at this instruction, + // because otherwise they would interfere, and the register wouldn't be selected. + // When the interval is aligned and the above happens, there's no problem as + // the instruction that is actually with the last use is the one + // before that position. + selectedNextUse &= ~InstructionGapMask; + + if (selectedNextUse <= current.GetStart()) + { + return false; + } + else if (selectedNextUse < current.GetEnd()) + { + Debug.Assert(selectedNextUse > current.GetStart(), "Trying to split interval at the start."); + + LiveInterval splitChild = current.Split(selectedNextUse); + + if (splitChild.UsesCount != 0) + { + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild); + } + else + { + Spill(context, splitChild); + } + } + + current.Register = new Register(selectedReg, regType); + + if (regType == RegisterType.Integer) + { + context.IntUsedRegisters |= 1 << selectedReg; + } + else /* if (regType == RegisterType.Vector) */ + { + context.VecUsedRegisters |= 1 << selectedReg; + } + + context.Active.Set(cIndex); + + return true; + } + + private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex) + { + RegisterType regType = current.Local.Type.ToRegisterType(); + + int availableRegisters = context.Masks.GetAvailableRegisters(regType); + + int[] usePositions = new int[RegistersCount]; + int[] blockedPositions = new int[RegistersCount]; + + for (int index = 0; index < RegistersCount; index++) + { + if ((availableRegisters & (1 << index)) != 0) + { + usePositions[index] = int.MaxValue; + + blockedPositions[index] = int.MaxValue; + } + } + + void SetUsePosition(int index, int position) + { + usePositions[index] = Math.Min(usePositions[index], position); + } + + void SetBlockedPosition(int index, int position) + { + blockedPositions[index] = Math.Min(blockedPositions[index], position); + + SetUsePosition(index, position); + } + + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register.Type == regType) + { + int nextUse = interval.NextUseAfter(current.GetStart()); + + if (nextUse != -1) + { + SetUsePosition(interval.Register.Index, nextUse); + } + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register.Type == regType && interval.Overlaps(current)) + { + int nextUse = interval.NextUseAfter(current.GetStart()); + + if (nextUse != -1) + { + SetUsePosition(interval.Register.Index, nextUse); + } + } + } + + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.IsFixed && interval.Register.Type == regType) + { + SetBlockedPosition(interval.Register.Index, 0); + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (interval.IsFixed && interval.Register.Type == regType) + { + int overlapPosition = interval.GetOverlapPosition(current); + + if (overlapPosition != LiveInterval.NotFound) + { + SetBlockedPosition(interval.Register.Index, overlapPosition); + } + } + } + + int selectedReg = GetHighestValueIndex(usePositions); + + int currentFirstUse = current.FirstUse(); + + Debug.Assert(currentFirstUse >= 0, "Current interval has no uses."); + + if (usePositions[selectedReg] < currentFirstUse) + { + // All intervals on inactive and active are being used before current, + // so spill the current interval. + Debug.Assert(currentFirstUse > current.GetStart(), "Trying to spill a interval currently being used."); + + LiveInterval splitChild = current.Split(currentFirstUse); + + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild); + + Spill(context, current); + } + else if (blockedPositions[selectedReg] > current.GetEnd()) + { + // Spill made the register available for the entire current lifetime, + // so we only need to split the intervals using the selected register. + current.Register = new Register(selectedReg, regType); + + SplitAndSpillOverlappingIntervals(context, current); + + context.Active.Set(cIndex); + } + else + { + // There are conflicts even after spill due to the use of fixed registers + // that can't be spilled, so we need to also split current at the point of + // the first fixed register use. + current.Register = new Register(selectedReg, regType); + + int splitPosition = blockedPositions[selectedReg] & ~InstructionGapMask; + + Debug.Assert(splitPosition > current.GetStart(), "Trying to split a interval at a invalid position."); + + LiveInterval splitChild = current.Split(splitPosition); + + if (splitChild.UsesCount != 0) + { + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild); + } + else + { + Spill(context, splitChild); + } + + SplitAndSpillOverlappingIntervals(context, current); + + context.Active.Set(cIndex); + } + } + + private static int GetHighestValueIndex(int[] array) + { + int higuest = array[0]; + + if (higuest == int.MaxValue) + { + return 0; + } + + int selected = 0; + + for (int index = 1; index < array.Length; index++) + { + int current = array[index]; + + if (higuest < current) + { + higuest = current; + selected = index; + + if (current == int.MaxValue) + { + break; + } + } + } + + return selected; + } + + private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current) + { + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register == current.Register) + { + SplitAndSpillOverlappingInterval(context, current, interval); + + context.Active.Clear(iIndex); + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current)) + { + SplitAndSpillOverlappingInterval(context, current, interval); + + context.Inactive.Clear(iIndex); + } + } + } + + private void SplitAndSpillOverlappingInterval( + AllocationContext context, + LiveInterval current, + LiveInterval interval) + { + // If there's a next use after the start of the current interval, + // we need to split the spilled interval twice, and re-insert it + // on the "pending" list to ensure that it will get a new register + // on that use position. + int nextUse = interval.NextUseAfter(current.GetStart()); + + LiveInterval splitChild; + + if (interval.GetStart() < current.GetStart()) + { + splitChild = interval.Split(current.GetStart()); + } + else + { + splitChild = interval; + } + + if (nextUse != -1) + { + Debug.Assert(nextUse > current.GetStart(), "Trying to spill a interval currently being used."); + + if (nextUse > splitChild.GetStart()) + { + LiveInterval right = splitChild.Split(nextUse); + + Spill(context, splitChild); + + splitChild = right; + } + + InsertInterval(splitChild); + } + else + { + Spill(context, splitChild); + } + } + + private void InsertInterval(LiveInterval interval) + { + Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses."); + Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval."); + Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval."); + + int startIndex = RegistersCount * 2; + + int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null); + + if (insertIndex < 0) + { + insertIndex = ~insertIndex; + } + + _intervals.Insert(insertIndex, interval); + } + + private void Spill(AllocationContext context, LiveInterval interval) + { + Debug.Assert(!interval.IsFixed, "Trying to spill a fixed interval."); + Debug.Assert(interval.UsesCount == 0, "Trying to spill a interval with uses."); + + // We first check if any of the siblings were spilled, if so we can reuse + // the stack offset. Otherwise, we allocate a new space on the stack. + // This prevents stack-to-stack copies being necessary for a split interval. + if (!interval.TrySpillWithSiblingOffset()) + { + interval.Spill(context.StackAlloc.Allocate(interval.Local.Type)); + } + } + + private void InsertSplitCopies() + { + Dictionary<int, CopyResolver> copyResolvers = new Dictionary<int, CopyResolver>(); + + CopyResolver GetCopyResolver(int position) + { + CopyResolver copyResolver = new CopyResolver(); + + if (copyResolvers.TryAdd(position, copyResolver)) + { + return copyResolver; + } + + return copyResolvers[position]; + } + + foreach (LiveInterval interval in _intervals.Where(x => x.IsSplit)) + { + LiveInterval previous = interval; + + foreach (LiveInterval splitChild in interval.SplitChilds()) + { + int splitPosition = splitChild.GetStart(); + + if (!_blockEdges.Contains(splitPosition) && previous.GetEnd() == splitPosition) + { + GetCopyResolver(splitPosition).AddSplit(previous, splitChild); + } + + previous = splitChild; + } + } + + foreach (KeyValuePair<int, CopyResolver> kv in copyResolvers) + { + CopyResolver copyResolver = kv.Value; + + if (!copyResolver.HasCopy) + { + continue; + } + + int splitPosition = kv.Key; + + LinkedListNode<Node> node = GetOperationNode(splitPosition); + + Operation[] sequence = copyResolver.Sequence(); + + node = node.List.AddBefore(node, sequence[0]); + + for (int index = 1; index < sequence.Length; index++) + { + node = node.List.AddAfter(node, sequence[index]); + } + } + } + + private void InsertSplitCopiesAtEdges(ControlFlowGraph cfg) + { + int blocksCount = cfg.Blocks.Count; + + bool IsSplitEdgeBlock(BasicBlock block) + { + return block.Index >= blocksCount; + } + + for (LinkedListNode<BasicBlock> node = cfg.Blocks.First; node != null; node = node.Next) + { + BasicBlock block = node.Value; + + if (IsSplitEdgeBlock(block)) + { + continue; + } + + bool hasSingleOrNoSuccessor = block.Next == null || block.Branch == null; + + foreach (BasicBlock successor in Successors(block)) + { + int succIndex = successor.Index; + + // If the current node is a split node, then the actual successor node + // (the successor before the split) should be right after it. + if (IsSplitEdgeBlock(successor)) + { + succIndex = Successors(successor).First().Index; + } + + CopyResolver copyResolver = new CopyResolver(); + + foreach (int iIndex in _blockLiveIn[succIndex]) + { + LiveInterval interval = _parentIntervals[iIndex]; + + if (!interval.IsSplit) + { + continue; + } + + int lEnd = _blockRanges[block.Index].End - 1; + int rStart = _blockRanges[succIndex].Start; + + LiveInterval left = interval.GetSplitChild(lEnd); + LiveInterval right = interval.GetSplitChild(rStart); + + if (left != null && right != null && left != right) + { + copyResolver.AddSplit(left, right); + } + } + + if (!copyResolver.HasCopy) + { + continue; + } + + Operation[] sequence = copyResolver.Sequence(); + + if (hasSingleOrNoSuccessor) + { + foreach (Operation operation in sequence) + { + block.Append(operation); + } + } + else if (successor.Predecessors.Count == 1) + { + LinkedListNode<Node> prependNode = successor.Operations.AddFirst(sequence[0]); + + for (int index = 1; index < sequence.Length; index++) + { + Operation operation = sequence[index]; + + prependNode = successor.Operations.AddAfter(prependNode, operation); + } + } + else + { + // Split the critical edge. + BasicBlock splitBlock = cfg.SplitEdge(block, successor); + + foreach (Operation operation in sequence) + { + splitBlock.Append(operation); + } + } + } + } + } + + private void ReplaceLocalWithRegister(LiveInterval current) + { + Operand register = GetRegister(current); + + foreach (int usePosition in current.UsePositions()) + { + Node operation = GetOperationNode(usePosition).Value; + + for (int index = 0; index < operation.SourcesCount; index++) + { + Operand source = operation.GetSource(index); + + if (source == current.Local) + { + operation.SetSource(index, register); + } + } + + for (int index = 0; index < operation.DestinationsCount; index++) + { + Operand dest = operation.GetDestination(index); + + if (dest == current.Local) + { + operation.SetDestination(index, register); + } + } + } + } + + private static Operand GetRegister(LiveInterval interval) + { + Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed."); + + return new Operand( + interval.Register.Index, + interval.Register.Type, + interval.Local.Type); + } + + private LinkedListNode<Node> GetOperationNode(int position) + { + return _operationNodes[position / InstructionGap]; + } + + private void NumberLocals(ControlFlowGraph cfg) + { + _operationNodes = new List<LinkedListNode<Node>>(); + + _intervals = new List<LiveInterval>(); + + for (int index = 0; index < RegistersCount; index++) + { + _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer))); + _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector))); + } + + HashSet<Operand> visited = new HashSet<Operand>(); + + _operationsCount = 0; + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + for (LinkedListNode<Node> node = block.Operations.First; node != null; node = node.Next) + { + _operationNodes.Add(node); + + Node operation = node.Value; + + foreach (Operand dest in Destinations(operation)) + { + if (dest.Kind == OperandKind.LocalVariable && visited.Add(dest)) + { + dest.NumberLocal(_intervals.Count); + + _intervals.Add(new LiveInterval(dest)); + } + } + } + + _operationsCount += block.Operations.Count * InstructionGap; + + if (block.Operations.Count == 0) + { + // Pretend we have a dummy instruction on the empty block. + _operationNodes.Add(null); + + _operationsCount += InstructionGap; + } + } + + _parentIntervals = _intervals.ToArray(); + } + + private void BuildIntervals(ControlFlowGraph cfg, AllocationContext context) + { + _blockRanges = new LiveRange[cfg.Blocks.Count]; + + int mapSize = _intervals.Count; + + BitMap[] blkLiveGen = new BitMap[cfg.Blocks.Count]; + BitMap[] blkLiveKill = new BitMap[cfg.Blocks.Count]; + + // Compute local live sets. + foreach (BasicBlock block in cfg.Blocks) + { + BitMap liveGen = new BitMap(mapSize); + BitMap liveKill = new BitMap(mapSize); + + foreach (Node node in block.Operations) + { + foreach (Operand source in Sources(node)) + { + int id = GetOperandId(source); + + if (!liveKill.IsSet(id)) + { + liveGen.Set(id); + } + } + + foreach (Operand dest in Destinations(node)) + { + liveKill.Set(GetOperandId(dest)); + } + } + + blkLiveGen [block.Index] = liveGen; + blkLiveKill[block.Index] = liveKill; + } + + // Compute global live sets. + BitMap[] blkLiveIn = new BitMap[cfg.Blocks.Count]; + BitMap[] blkLiveOut = new BitMap[cfg.Blocks.Count]; + + for (int index = 0; index < cfg.Blocks.Count; index++) + { + blkLiveIn [index] = new BitMap(mapSize); + blkLiveOut[index] = new BitMap(mapSize); + } + + bool modified; + + do + { + modified = false; + + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + BitMap liveOut = blkLiveOut[block.Index]; + + foreach (BasicBlock successor in Successors(block)) + { + if (liveOut.Set(blkLiveIn[successor.Index])) + { + modified = true; + } + } + + BitMap liveIn = blkLiveIn[block.Index]; + + liveIn.Set (liveOut); + liveIn.Clear(blkLiveKill[block.Index]); + liveIn.Set (blkLiveGen [block.Index]); + } + } + while (modified); + + _blockLiveIn = blkLiveIn; + + _blockEdges = new HashSet<int>(); + + // Compute lifetime intervals. + int operationPos = _operationsCount; + + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + // We handle empty blocks by pretending they have a dummy instruction, + // because otherwise the block would have the same start and end position, + // and this is not valid. + int instCount = Math.Max(block.Operations.Count, 1); + + int blockStart = operationPos - instCount * InstructionGap; + int blockEnd = operationPos; + + _blockRanges[block.Index] = new LiveRange(blockStart, blockEnd); + + _blockEdges.Add(blockStart); + + BitMap liveOut = blkLiveOut[block.Index]; + + foreach (int id in liveOut) + { + _intervals[id].AddRange(blockStart, blockEnd); + } + + if (block.Operations.Count == 0) + { + operationPos -= InstructionGap; + + continue; + } + + foreach (Node node in BottomOperations(block)) + { + operationPos -= InstructionGap; + + foreach (Operand dest in Destinations(node)) + { + LiveInterval interval = _intervals[GetOperandId(dest)]; + + interval.SetStart(operationPos + 1); + interval.AddUsePosition(operationPos + 1); + } + + foreach (Operand source in Sources(node)) + { + LiveInterval interval = _intervals[GetOperandId(source)]; + + interval.AddRange(blockStart, operationPos + 1); + interval.AddUsePosition(operationPos); + } + + if (node is Operation operation && operation.Instruction == Instruction.Call) + { + AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer); + AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector); + } + } + } + } + + private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType) + { + while (mask != 0) + { + int regIndex = BitUtils.LowestBitSet(mask); + + Register callerSavedReg = new Register(regIndex, regType); + + LiveInterval interval = _intervals[GetRegisterId(callerSavedReg)]; + + interval.AddRange(operationPos + 1, operationPos + InstructionGap); + + mask &= ~(1 << regIndex); + } + } + + private static int GetOperandId(Operand operand) + { + if (operand.Kind == OperandKind.LocalVariable) + { + return operand.AsInt32(); + } + else if (operand.Kind == OperandKind.Register) + { + return GetRegisterId(operand.GetRegister()); + } + else + { + throw new ArgumentException($"Invalid operand kind \"{operand.Kind}\"."); + } + } + + private static int GetRegisterId(Register register) + { + return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0); + } + + private static IEnumerable<BasicBlock> Successors(BasicBlock block) + { + if (block.Next != null) + { + yield return block.Next; + } + + if (block.Branch != null) + { + yield return block.Branch; + } + } + + private static IEnumerable<Node> BottomOperations(BasicBlock block) + { + LinkedListNode<Node> node = block.Operations.Last; + + while (node != null && !(node.Value is PhiNode)) + { + yield return node.Value; + + node = node.Previous; + } + } + + private static IEnumerable<Operand> Destinations(Node node) + { + for (int index = 0; index < node.DestinationsCount; index++) + { + yield return node.GetDestination(index); + } + } + + private static IEnumerable<Operand> Sources(Node node) + { + for (int index = 0; index < node.SourcesCount; index++) + { + Operand source = node.GetSource(index); + + if (IsLocalOrRegister(source.Kind)) + { + yield return source; + } + } + } + + private static bool IsLocalOrRegister(OperandKind kind) + { + return kind == OperandKind.LocalVariable || + kind == OperandKind.Register; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs new file mode 100644 index 00000000..18858a76 --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs @@ -0,0 +1,390 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class LiveInterval : IComparable<LiveInterval> + { + public const int NotFound = -1; + + private LiveInterval _parent; + + private SortedSet<int> _usePositions; + + public int UsesCount => _usePositions.Count; + + private List<LiveRange> _ranges; + + private SortedList<int, LiveInterval> _childs; + + public bool IsSplit => _childs.Count != 0; + + public Operand Local { get; } + + public Register Register { get; set; } + + public int SpillOffset { get; private set; } + + public bool IsSpilled => SpillOffset != -1; + public bool IsFixed { get; } + + public bool IsEmpty => _ranges.Count == 0; + + public LiveInterval(Operand local = null, LiveInterval parent = null) + { + Local = local; + _parent = parent ?? this; + + _usePositions = new SortedSet<int>(); + + _ranges = new List<LiveRange>(); + + _childs = new SortedList<int, LiveInterval>(); + + SpillOffset = -1; + } + + public LiveInterval(Register register) : this() + { + IsFixed = true; + Register = register; + } + + public void SetStart(int position) + { + if (_ranges.Count != 0) + { + Debug.Assert(position != _ranges[0].End); + + _ranges[0] = new LiveRange(position, _ranges[0].End); + } + else + { + _ranges.Add(new LiveRange(position, position + 1)); + } + } + + public int GetStart() + { + if (_ranges.Count == 0) + { + throw new InvalidOperationException("Empty interval."); + } + + return _ranges[0].Start; + } + + public void SetEnd(int position) + { + if (_ranges.Count != 0) + { + int lastIdx = _ranges.Count - 1; + + Debug.Assert(position != _ranges[lastIdx].Start); + + _ranges[lastIdx] = new LiveRange(_ranges[lastIdx].Start, position); + } + else + { + _ranges.Add(new LiveRange(position, position + 1)); + } + } + + public int GetEnd() + { + if (_ranges.Count == 0) + { + throw new InvalidOperationException("Empty interval."); + } + + return _ranges[_ranges.Count - 1].End; + } + + public void AddRange(int start, int end) + { + if (start >= end) + { + throw new ArgumentException("Invalid range start position " + start + ", " + end); + } + + int index = _ranges.BinarySearch(new LiveRange(start, end)); + + if (index >= 0) + { + // New range insersects with an existing range, we need to remove + // all the intersecting ranges before adding the new one. + // We also extend the new range as needed, based on the values of + // the existing ranges being removed. + int lIndex = index; + int rIndex = index; + + while (lIndex > 0 && _ranges[lIndex - 1].End >= start) + { + lIndex--; + } + + while (rIndex + 1 < _ranges.Count && _ranges[rIndex + 1].Start <= end) + { + rIndex++; + } + + if (start > _ranges[lIndex].Start) + { + start = _ranges[lIndex].Start; + } + + if (end < _ranges[rIndex].End) + { + end = _ranges[rIndex].End; + } + + _ranges.RemoveRange(lIndex, (rIndex - lIndex) + 1); + + InsertRange(lIndex, start, end); + } + else + { + InsertRange(~index, start, end); + } + } + + private void InsertRange(int index, int start, int end) + { + // Here we insert a new range on the ranges list. + // If possible, we extend an existing range rather than inserting a new one. + // We can extend an existing range if any of the following conditions are true: + // - The new range starts right after the end of the previous range on the list. + // - The new range ends right before the start of the next range on the list. + // If both cases are true, we can extend either one. We prefer to extend the + // previous range, and then remove the next one, but theres no specific reason + // for that, extending either one will do. + int? extIndex = null; + + if (index > 0 && _ranges[index - 1].End == start) + { + start = _ranges[index - 1].Start; + + extIndex = index - 1; + } + + if (index < _ranges.Count && _ranges[index].Start == end) + { + end = _ranges[index].End; + + if (extIndex.HasValue) + { + _ranges.RemoveAt(index); + } + else + { + extIndex = index; + } + } + + if (extIndex.HasValue) + { + _ranges[extIndex.Value] = new LiveRange(start, end); + } + else + { + _ranges.Insert(index, new LiveRange(start, end)); + } + } + + public void AddUsePosition(int position) + { + _usePositions.Add(position); + } + + public bool Overlaps(int position) + { + return _ranges.BinarySearch(new LiveRange(position, position + 1)) >= 0; + } + + public bool Overlaps(LiveInterval other) + { + foreach (LiveRange range in other._ranges) + { + if (_ranges.BinarySearch(range) >= 0) + { + return true; + } + } + + return false; + } + + public int GetOverlapPosition(LiveInterval other) + { + foreach (LiveRange range in other._ranges) + { + int overlapIndex = _ranges.BinarySearch(range); + + if (overlapIndex >= 0) + { + // It's possible that we have multiple overlaps within a single interval, + // in this case, we pick the one with the lowest start position, since + // we return the first overlap position. + while (overlapIndex > 0 && _ranges[overlapIndex - 1].End > range.Start) + { + overlapIndex--; + } + + LiveRange overlappingRange = _ranges[overlapIndex]; + + return overlappingRange.Start; + } + } + + return NotFound; + } + + public IEnumerable<LiveInterval> SplitChilds() + { + return _childs.Values; + } + + public IEnumerable<int> UsePositions() + { + return _usePositions; + } + + public int FirstUse() + { + if (_usePositions.Count == 0) + { + return NotFound; + } + + return _usePositions.First(); + } + + public int NextUseAfter(int position) + { + foreach (int usePosition in _usePositions) + { + if (usePosition >= position) + { + return usePosition; + } + } + + return NotFound; + } + + public LiveInterval Split(int position) + { + LiveInterval right = new LiveInterval(Local, _parent); + + int splitIndex = 0; + + for (; splitIndex < _ranges.Count; splitIndex++) + { + LiveRange range = _ranges[splitIndex]; + + if (position > range.Start && position <= range.End) + { + right._ranges.Add(new LiveRange(position, range.End)); + + range = new LiveRange(range.Start, position); + + _ranges[splitIndex++] = range; + + break; + } + + if (range.Start >= position) + { + break; + } + } + + if (splitIndex < _ranges.Count) + { + int count = _ranges.Count - splitIndex; + + right._ranges.AddRange(_ranges.GetRange(splitIndex, count)); + + _ranges.RemoveRange(splitIndex, count); + } + + foreach (int usePosition in _usePositions.Where(x => x >= position)) + { + right._usePositions.Add(usePosition); + } + + _usePositions.RemoveWhere(x => x >= position); + + Debug.Assert(_ranges.Count != 0, "Left interval is empty after split."); + + Debug.Assert(right._ranges.Count != 0, "Right interval is empty after split."); + + AddSplitChild(right); + + return right; + } + + private void AddSplitChild(LiveInterval child) + { + Debug.Assert(!child.IsEmpty, "Trying to insert a empty interval."); + + _parent._childs.Add(child.GetStart(), child); + } + + public LiveInterval GetSplitChild(int position) + { + if (Overlaps(position)) + { + return this; + } + + foreach (LiveInterval splitChild in _childs.Values) + { + if (splitChild.Overlaps(position)) + { + return splitChild; + } + } + + return null; + } + + public bool TrySpillWithSiblingOffset() + { + foreach (LiveInterval splitChild in _parent._childs.Values) + { + if (splitChild.IsSpilled) + { + Spill(splitChild.SpillOffset); + + return true; + } + } + + return false; + } + + public void Spill(int offset) + { + SpillOffset = offset; + } + + public int CompareTo(LiveInterval other) + { + if (_ranges.Count == 0 || other._ranges.Count == 0) + { + return _ranges.Count.CompareTo(other._ranges.Count); + } + + return _ranges[0].Start.CompareTo(other._ranges[0].Start); + } + + public override string ToString() + { + return string.Join("; ", _ranges); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs new file mode 100644 index 00000000..b5faeffd --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs @@ -0,0 +1,31 @@ +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + struct LiveRange : IComparable<LiveRange> + { + public int Start { get; } + public int End { get; } + + public LiveRange(int start, int end) + { + Start = start; + End = end; + } + + public int CompareTo(LiveRange other) + { + if (Start < other.End && other.Start < End) + { + return 0; + } + + return Start.CompareTo(other.Start); + } + + public override string ToString() + { + return $"[{Start}, {End}["; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs new file mode 100644 index 00000000..9652224e --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs @@ -0,0 +1,47 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + struct RegisterMasks + { + public int IntAvailableRegisters { get; } + public int VecAvailableRegisters { get; } + public int IntCallerSavedRegisters { get; } + public int VecCallerSavedRegisters { get; } + public int IntCalleeSavedRegisters { get; } + public int VecCalleeSavedRegisters { get; } + + public RegisterMasks( + int intAvailableRegisters, + int vecAvailableRegisters, + int intCallerSavedRegisters, + int vecCallerSavedRegisters, + int intCalleeSavedRegisters, + int vecCalleeSavedRegisters) + { + IntAvailableRegisters = intAvailableRegisters; + VecAvailableRegisters = vecAvailableRegisters; + IntCallerSavedRegisters = intCallerSavedRegisters; + VecCallerSavedRegisters = vecCallerSavedRegisters; + IntCalleeSavedRegisters = intCalleeSavedRegisters; + VecCalleeSavedRegisters = vecCalleeSavedRegisters; + } + + public int GetAvailableRegisters(RegisterType type) + { + if (type == RegisterType.Integer) + { + return IntAvailableRegisters; + } + else if (type == RegisterType.Vector) + { + return VecAvailableRegisters; + } + else + { + throw new ArgumentException($"Invalid register type \"{type}\"."); + } + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs new file mode 100644 index 00000000..a6233d6e --- /dev/null +++ b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs @@ -0,0 +1,27 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class StackAllocator + { + private int _offset; + + public int TotalSize => _offset; + + public int Allocate(OperandType type) + { + return Allocate(type.GetSizeInBytes()); + } + + public int Allocate(int sizeInBytes) + { + int offset = _offset; + + _offset += sizeInBytes; + + return offset; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs b/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs new file mode 100644 index 00000000..4955f1b4 --- /dev/null +++ b/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.CodeGen.Unwinding +{ + struct UnwindInfo + { + public UnwindPushEntry[] PushEntries { get; } + + public int PrologueSize { get; } + + public int FixedAllocSize { get; } + + public UnwindInfo(UnwindPushEntry[] pushEntries, int prologueSize, int fixedAllocSize) + { + PushEntries = pushEntries; + PrologueSize = prologueSize; + FixedAllocSize = fixedAllocSize; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs b/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs new file mode 100644 index 00000000..6597e2b4 --- /dev/null +++ b/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs @@ -0,0 +1,20 @@ +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.Unwinding +{ + struct UnwindPushEntry + { + public int Index { get; } + + public RegisterType Type { get; } + + public int StreamEndOffset { get; } + + public UnwindPushEntry(int index, RegisterType type, int streamEndOffset) + { + Index = index; + Type = type; + StreamEndOffset = streamEndOffset; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs new file mode 100644 index 00000000..c6483894 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -0,0 +1,1358 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Diagnostics; +using System.IO; + +namespace ARMeilleure.CodeGen.X86 +{ + class Assembler + { + private const int BadOp = 0; + private const int OpModRMBits = 24; + + private const byte RexPrefix = 0x40; + private const byte RexWPrefix = 0x48; + private const byte LockPrefix = 0xf0; + + [Flags] + private enum InstructionFlags + { + None = 0, + RegOnly = 1 << 0, + Reg8Src = 1 << 1, + Reg8Dest = 1 << 2, + RexW = 1 << 3, + Vex = 1 << 4, + + PrefixBit = 16, + PrefixMask = 3 << PrefixBit, + Prefix66 = 1 << PrefixBit, + PrefixF3 = 2 << PrefixBit, + PrefixF2 = 3 << PrefixBit + } + + private struct InstructionInfo + { + public int OpRMR { get; } + public int OpRMImm8 { get; } + public int OpRMImm32 { get; } + public int OpRImm64 { get; } + public int OpRRM { get; } + + public InstructionFlags Flags { get; } + + public InstructionInfo( + int opRMR, + int opRMImm8, + int opRMImm32, + int opRImm64, + int opRRM, + InstructionFlags flags) + { + OpRMR = opRMR; + OpRMImm8 = opRMImm8; + OpRMImm32 = opRMImm32; + OpRImm64 = opRImm64; + OpRRM = opRRM; + Flags = flags; + } + } + + private static InstructionInfo[] _instTable; + + private Stream _stream; + + static Assembler() + { + _instTable = new InstructionInfo[(int)X86Instruction.Count]; + + // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags + Add(X86Instruction.Add, new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstructionFlags.None)); + Add(X86Instruction.Addpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex)); + Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None)); + Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex)); + Add(X86Instruction.Bsr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstructionFlags.None)); + Add(X86Instruction.Bswap, new InstructionInfo(0x00000fc8, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RegOnly)); + Add(X86Instruction.Call, new InstructionInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Cmovcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstructionFlags.None)); + Add(X86Instruction.Cmp, new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstructionFlags.None)); + Add(X86Instruction.Cmppd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cmpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex)); + Add(X86Instruction.Cmpsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cmpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW)); + Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex)); + Add(X86Instruction.Cpuid, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fa2, InstructionFlags.RegOnly)); + Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex)); + Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtpd2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cvtps2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cvtps2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex)); + Add(X86Instruction.Cvtsd2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2c, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsd2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsi2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsi2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cvtss2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Div, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstructionFlags.None)); + Add(X86Instruction.Divpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Divps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex)); + Add(X86Instruction.Divsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Divss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Haddpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Haddps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Idiv, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstructionFlags.None)); + Add(X86Instruction.Imul, new InstructionInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstructionFlags.None)); + Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None)); + Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None)); + Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex)); + Add(X86Instruction.Maxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Maxss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Minpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Minps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex)); + Add(X86Instruction.Minsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Minss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Mov, new InstructionInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None)); + Add(X86Instruction.Mov16, new InstructionInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstructionFlags.Prefix66)); + Add(X86Instruction.Mov8, new InstructionInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest)); + Add(X86Instruction.Movd, new InstructionInfo(0x00000f7e, BadOp, BadOp, BadOp, 0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Movdqu, new InstructionInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movhlps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstructionFlags.Vex)); + Add(X86Instruction.Movlhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstructionFlags.Vex)); + Add(X86Instruction.Movq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movsd, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Movss, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movsx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstructionFlags.None)); + Add(X86Instruction.Movsx32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstructionFlags.None)); + Add(X86Instruction.Movsx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstructionFlags.Reg8Src)); + Add(X86Instruction.Movzx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstructionFlags.None)); + Add(X86Instruction.Movzx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstructionFlags.Reg8Src)); + Add(X86Instruction.Mul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstructionFlags.None)); + Add(X86Instruction.Mulpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Mulps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex)); + Add(X86Instruction.Mulsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Mulss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Neg, new InstructionInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Not, new InstructionInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Or, new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstructionFlags.None)); + Add(X86Instruction.Paddb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrb, new InstructionInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrd, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrq, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmulld, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmullw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pop, new InstructionInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Popcnt, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstructionFlags.PrefixF3)); + Add(X86Instruction.Por, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pshufb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pshufd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pslld, new InstructionInfo(BadOp, 0x06000f72, BadOp, BadOp, 0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pslldq, new InstructionInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psllq, new InstructionInfo(BadOp, 0x06000f73, BadOp, BadOp, 0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psllw, new InstructionInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrad, new InstructionInfo(BadOp, 0x04000f72, BadOp, BadOp, 0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psraw, new InstructionInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrld, new InstructionInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrlq, new InstructionInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrldq, new InstructionInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrlw, new InstructionInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckldq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Push, new InstructionInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstructionFlags.None)); + Add(X86Instruction.Pxor, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Rcpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex)); + Add(X86Instruction.Rcpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Ror, new InstructionInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Roundpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Rsqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex)); + Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest)); + Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Shufps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex)); + Add(X86Instruction.Sqrtpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex)); + Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None)); + Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex)); + Add(X86Instruction.Subsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Subss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Test, new InstructionInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Unpckhpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex)); + Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex)); + Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); + Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex)); + } + + private static void Add(X86Instruction inst, InstructionInfo info) + { + _instTable[(int)inst] = info; + } + + public Assembler(Stream stream) + { + _stream = stream; + } + + public void Add(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Add); + } + + public void Addsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Addsd); + } + + public void Addss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Addss); + } + + public void And(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.And); + } + + public void Bsr(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Bsr); + } + + public void Bswap(Operand dest) + { + WriteInstruction(dest, null, dest.Type, X86Instruction.Bswap); + } + + public void Call(Operand dest) + { + WriteInstruction(dest, null, OperandType.None, X86Instruction.Call); + } + + public void Cdq() + { + WriteByte(0x99); + } + + public void Cmovcc(Operand dest, Operand source, OperandType type, X86Condition condition) + { + InstructionInfo info = _instTable[(int)X86Instruction.Cmovcc]; + + WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM | (int)condition, rrm: true); + } + + public void Cmp(Operand src1, Operand src2, OperandType type) + { + WriteInstruction(src1, src2, type, X86Instruction.Cmp); + } + + public void Cqo() + { + WriteByte(0x48); + WriteByte(0x99); + } + + public void Cmpxchg16b(MemoryOperand memOp) + { + WriteByte(LockPrefix); + + WriteInstruction(memOp, null, OperandType.None, X86Instruction.Cmpxchg16b); + } + + public void Comisd(Operand src1, Operand src2) + { + WriteInstruction(src1, null, src2, X86Instruction.Comisd); + } + + public void Comiss(Operand src1, Operand src2) + { + WriteInstruction(src1, null, src2, X86Instruction.Comiss); + } + + public void Cpuid() + { + WriteInstruction(null, null, OperandType.None, X86Instruction.Cpuid); + } + + public void Cvtsd2ss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss); + } + + public void Cvtsi2sd(Operand dest, Operand src1, Operand src2, OperandType type) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2sd, type); + } + + public void Cvtsi2ss(Operand dest, Operand src1, Operand src2, OperandType type) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2ss, type); + } + + public void Cvtss2sd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtss2sd); + } + + public void Div(Operand source) + { + WriteInstruction(null, source, source.Type, X86Instruction.Div); + } + + public void Divsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Divsd); + } + + public void Divss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Divss); + } + + public void Idiv(Operand source) + { + WriteInstruction(null, source, source.Type, X86Instruction.Idiv); + } + + public void Imul(Operand source) + { + WriteInstruction(null, source, source.Type, X86Instruction.Imul128); + } + + public void Imul(Operand dest, Operand source, OperandType type) + { + if (source.Kind != OperandKind.Register) + { + throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\"."); + } + + WriteInstruction(dest, source, type, X86Instruction.Imul); + } + + public void Imul(Operand dest, Operand src1, Operand src2, OperandType type) + { + InstructionInfo info = _instTable[(int)X86Instruction.Imul]; + + if (src2.Kind != OperandKind.Constant) + { + throw new ArgumentException($"Invalid source 2 operand kind \"{src2.Kind}\"."); + } + + if (IsImm8(src2.Value, src2.Type) && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, null, src1, type, info.Flags, info.OpRMImm8, rrm: true); + + WriteByte(src2.AsByte()); + } + else if (IsImm32(src2.Value, src2.Type) && info.OpRMImm32 != BadOp) + { + WriteOpCode(dest, null, src1, type, info.Flags, info.OpRMImm32, rrm: true); + + WriteInt32(src2.AsInt32()); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{src2.Value:X}."); + } + } + + public void Insertps(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Insertps); + + WriteByte(imm); + } + + public void Jcc(X86Condition condition, long offset) + { + if (ConstFitsOnS8(offset)) + { + WriteByte((byte)(0x70 | (int)condition)); + + WriteByte((byte)offset); + } + else if (ConstFitsOnS32(offset)) + { + WriteByte(0x0f); + WriteByte((byte)(0x80 | (int)condition)); + + WriteInt32((int)offset); + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + public void Jmp(long offset) + { + if (ConstFitsOnS8(offset)) + { + WriteByte(0xeb); + + WriteByte((byte)offset); + } + else if (ConstFitsOnS32(offset)) + { + WriteByte(0xe9); + + WriteInt32((int)offset); + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + public void Lea(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Lea); + } + + public void Mov(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Mov); + } + + public void Mov16(Operand dest, Operand source) + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov16); + } + + public void Mov8(Operand dest, Operand source) + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov8); + } + + public void Movd(Operand dest, Operand source) + { + InstructionInfo info = _instTable[(int)X86Instruction.Movd]; + + if (source.Type.IsInteger() || source.Kind == OperandKind.Memory) + { + WriteOpCode(dest, null, source, OperandType.None, info.Flags, info.OpRRM, rrm: true); + } + else + { + WriteOpCode(dest, null, source, OperandType.None, info.Flags, info.OpRMR); + } + } + + public void Movdqu(Operand dest, Operand source) + { + WriteInstruction(dest, null, source, X86Instruction.Movdqu); + } + + public void Movhlps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movhlps); + } + + public void Movlhps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movlhps); + } + + public void Movq(Operand dest, Operand source) + { + InstructionInfo info = _instTable[(int)X86Instruction.Movd]; + + InstructionFlags flags = info.Flags | InstructionFlags.RexW; + + if (source.Type.IsInteger() || source.Kind == OperandKind.Memory) + { + WriteOpCode(dest, null, source, OperandType.None, flags, info.OpRRM, rrm: true); + } + else if (dest.Type.IsInteger() || dest.Kind == OperandKind.Memory) + { + WriteOpCode(dest, null, source, OperandType.None, flags, info.OpRMR); + } + else + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Movq); + } + } + + public void Movsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movsd); + } + + public void Movss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movss); + } + + public void Movsx16(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx16); + } + + public void Movsx32(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx32); + } + + public void Movsx8(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx8); + } + + public void Movzx16(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movzx16); + } + + public void Movzx8(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movzx8); + } + + public void Mul(Operand source) + { + WriteInstruction(null, source, source.Type, X86Instruction.Mul128); + } + + public void Mulsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Mulsd); + } + + public void Mulss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Mulss); + } + + public void Neg(Operand dest) + { + WriteInstruction(dest, null, dest.Type, X86Instruction.Neg); + } + + public void Not(Operand dest) + { + WriteInstruction(dest, null, dest.Type, X86Instruction.Not); + } + + public void Or(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Or); + } + + public void Pcmpeqw(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pcmpeqw); + } + + public void Pextrb(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, X86Instruction.Pextrb); + + WriteByte(imm); + } + + public void Pextrd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, X86Instruction.Pextrd); + + WriteByte(imm); + } + + public void Pextrq(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, X86Instruction.Pextrq); + + WriteByte(imm); + } + + public void Pextrw(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, X86Instruction.Pextrw); + + WriteByte(imm); + } + + public void Pinsrb(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrb); + + WriteByte(imm); + } + + public void Pinsrd(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrd); + + WriteByte(imm); + } + + public void Pinsrq(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrq); + + WriteByte(imm); + } + + public void Pinsrw(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrw); + + WriteByte(imm); + } + + public void Pop(Operand dest) + { + if (dest.Kind == OperandKind.Register) + { + WriteCompactInst(dest, 0x58); + } + else + { + WriteInstruction(dest, null, dest.Type, X86Instruction.Pop); + } + } + + public void Popcnt(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Popcnt); + } + + public void Pshufd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, X86Instruction.Pshufd); + + WriteByte(imm); + } + + public void Push(Operand source) + { + if (source.Kind == OperandKind.Register) + { + WriteCompactInst(source, 0x50); + } + else + { + WriteInstruction(null, source, source.Type, X86Instruction.Push); + } + } + + public void Return() + { + WriteByte(0xc3); + } + + public void Ror(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Ror); + } + + public void Sar(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Sar); + } + + public void Shl(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Shl); + } + + public void Shr(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Shr); + } + + public void Setcc(Operand dest, X86Condition condition) + { + InstructionInfo info = _instTable[(int)X86Instruction.Setcc]; + + WriteOpCode(dest, null, null, OperandType.None, info.Flags, info.OpRRM | (int)condition); + } + + public void Sub(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Sub); + } + + public void Subsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Subsd); + } + + public void Subss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Subss); + } + + public void Test(Operand src1, Operand src2, OperandType type) + { + WriteInstruction(src1, src2, type, X86Instruction.Test); + } + + public void Xor(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Xor); + } + + public void Xorps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Xorps); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand source, + OperandType type = OperandType.None) + { + WriteInstruction(dest, null, source, inst, type); + } + + public void WriteInstruction(X86Instruction inst, Operand dest, Operand src1, Operand src2) + { + if (src2.Kind == OperandKind.Constant) + { + WriteInstruction(src1, dest, src2, inst); + } + else + { + WriteInstruction(dest, src1, src2, inst); + } + } + + public void WriteInstruction(X86Instruction inst, Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, null, source, inst); + + WriteByte(imm); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand src1, + Operand src2, + Operand src3) + { + // 3+ operands can only be encoded with the VEX encoding scheme. + Debug.Assert(HardwareCapabilities.SupportsVexEncoding); + + WriteInstruction(dest, src1, src2, inst); + + WriteByte((byte)(src3.AsByte() << 4)); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand src1, + Operand src2, + byte imm) + { + WriteInstruction(dest, src1, src2, inst); + + WriteByte(imm); + } + + private void WriteShiftInst(Operand dest, Operand source, OperandType type, X86Instruction inst) + { + if (source.Kind == OperandKind.Register) + { + X86Register shiftReg = (X86Register)source.GetRegister().Index; + + if (shiftReg != X86Register.Rcx) + { + throw new ArgumentException($"Invalid shift register \"{shiftReg}\"."); + } + + source = null; + } + + WriteInstruction(dest, source, type, inst); + } + + private void WriteInstruction(Operand dest, Operand source, OperandType type, X86Instruction inst) + { + InstructionInfo info = _instTable[(int)inst]; + + if (source != null) + { + if (source.Kind == OperandKind.Constant) + { + ulong imm = source.Value; + + if (inst == X86Instruction.Mov8) + { + WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else if (inst == X86Instruction.Mov16) + { + WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm32); + + WriteInt16((short)imm); + } + else if (IsImm8(imm, type) && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else if (IsImm32(imm, type) && info.OpRMImm32 != BadOp) + { + WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm32); + + WriteInt32((int)imm); + } + else if (dest != null && dest.Kind == OperandKind.Register && info.OpRImm64 != BadOp) + { + int rexPrefix = GetRexPrefix(dest, source, type, rrm: false); + + if (rexPrefix != 0) + { + WriteByte((byte)rexPrefix); + } + + WriteByte((byte)(info.OpRImm64 + (dest.GetRegister().Index & 0b111))); + + WriteUInt64(imm); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{imm:X}."); + } + } + else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp) + { + WriteOpCode(dest, null, source, type, info.Flags, info.OpRMR); + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM, rrm: true); + } + else + { + throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\"."); + } + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM, rrm: true); + } + else if (info.OpRMR != BadOp) + { + WriteOpCode(dest, null, source, type, info.Flags, info.OpRMR); + } + else + { + throw new ArgumentNullException(nameof(source)); + } + } + + private void WriteInstruction( + Operand dest, + Operand src1, + Operand src2, + X86Instruction inst, + OperandType type = OperandType.None) + { + InstructionInfo info = _instTable[(int)inst]; + + if (src2 != null) + { + if (src2.Kind == OperandKind.Constant) + { + ulong imm = src2.Value; + + if ((byte)imm == imm && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, src1, null, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{imm:X}."); + } + } + else if (src2.Kind == OperandKind.Register && info.OpRMR != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR); + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true); + } + else + { + throw new ArgumentException($"Invalid source operand kind \"{src2.Kind}\"."); + } + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true); + } + else if (info.OpRMR != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR); + } + else + { + throw new ArgumentNullException(nameof(src2)); + } + } + + private void WriteOpCode( + Operand dest, + Operand src1, + Operand src2, + OperandType type, + InstructionFlags flags, + int opCode, + bool rrm = false) + { + int rexPrefix = GetRexPrefix(dest, src2, type, rrm); + + if ((flags & InstructionFlags.RexW) != 0) + { + rexPrefix |= RexWPrefix; + } + + int modRM = (opCode >> OpModRMBits) << 3; + + MemoryOperand memOp = null; + + if (dest != null) + { + if (dest.Kind == OperandKind.Register) + { + int regIndex = dest.GetRegister().Index; + + modRM |= (regIndex & 0b111) << (rrm ? 3 : 0); + + if ((flags & InstructionFlags.Reg8Dest) != 0 && regIndex >= 4) + { + rexPrefix |= RexPrefix; + } + } + else if (dest.Kind == OperandKind.Memory) + { + memOp = dest as MemoryOperand; + } + else + { + throw new ArgumentException("Invalid destination operand kind \"" + dest.Kind + "\"."); + } + } + + if (src2 != null) + { + if (src2.Kind == OperandKind.Register) + { + int regIndex = src2.GetRegister().Index; + + modRM |= (regIndex & 0b111) << (rrm ? 0 : 3); + + if ((flags & InstructionFlags.Reg8Src) != 0 && regIndex >= 4) + { + rexPrefix |= RexPrefix; + } + } + else if (src2.Kind == OperandKind.Memory && memOp == null) + { + memOp = src2 as MemoryOperand; + } + else + { + throw new ArgumentException("Invalid source operand kind \"" + src2.Kind + "\"."); + } + } + + bool needsSibByte = false; + bool needsDisplacement = false; + + int sib = 0; + + if (memOp != null) + { + // Either source or destination is a memory operand. + Register baseReg = memOp.BaseAddress.GetRegister(); + + X86Register baseRegLow = (X86Register)(baseReg.Index & 0b111); + + needsSibByte = memOp.Index != null || baseRegLow == X86Register.Rsp; + needsDisplacement = memOp.Displacement != 0 || baseRegLow == X86Register.Rbp; + + if (needsDisplacement) + { + if (ConstFitsOnS8(memOp.Displacement)) + { + modRM |= 0x40; + } + else /* if (ConstFitsOnS32(memOp.Displacement)) */ + { + modRM |= 0x80; + } + } + + if (baseReg.Index >= 8) + { + rexPrefix |= RexPrefix | (baseReg.Index >> 3); + } + + if (needsSibByte) + { + sib = (int)baseRegLow; + + if (memOp.Index != null) + { + int indexReg = memOp.Index.GetRegister().Index; + + if (indexReg == (int)X86Register.Rsp) + { + throw new ArgumentException("Using RSP as index register on the memory operand is not allowed."); + } + + if (indexReg >= 8) + { + rexPrefix |= RexPrefix | (indexReg >> 3) << 1; + } + + sib |= (indexReg & 0b111) << 3; + } + else + { + sib |= 0b100 << 3; + } + + sib |= (int)memOp.Scale << 6; + + modRM |= 0b100; + } + else + { + modRM |= (int)baseRegLow; + } + } + else + { + // Source and destination are registers. + modRM |= 0xc0; + } + + Debug.Assert(opCode != BadOp, "Invalid opcode value."); + + if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) + { + int vexByte2 = (int)(flags & InstructionFlags.PrefixMask) >> (int)InstructionFlags.PrefixBit; + + if (src1 != null) + { + vexByte2 |= (src1.GetRegister().Index ^ 0xf) << 3; + } + else + { + vexByte2 |= 0b1111 << 3; + } + + ushort opCodeHigh = (ushort)(opCode >> 8); + + if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf) + { + // Two-byte form. + WriteByte(0xc5); + + vexByte2 |= (~rexPrefix & 4) << 5; + + WriteByte((byte)vexByte2); + } + else + { + // Three-byte form. + WriteByte(0xc4); + + int vexByte1 = (~rexPrefix & 7) << 5; + + switch (opCodeHigh) + { + case 0xf: vexByte1 |= 1; break; + case 0xf38: vexByte1 |= 2; break; + case 0xf3a: vexByte1 |= 3; break; + + default: Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}."); break; + } + + vexByte2 |= (rexPrefix & 8) << 4; + + WriteByte((byte)vexByte1); + WriteByte((byte)vexByte2); + } + + opCode &= 0xff; + } + else + { + switch (flags & InstructionFlags.PrefixMask) + { + case InstructionFlags.Prefix66: WriteByte(0x66); break; + case InstructionFlags.PrefixF2: WriteByte(0xf2); break; + case InstructionFlags.PrefixF3: WriteByte(0xf3); break; + } + + if (rexPrefix != 0) + { + WriteByte((byte)rexPrefix); + } + } + + if (dest != null && (flags & InstructionFlags.RegOnly) != 0) + { + opCode += dest.GetRegister().Index & 7; + } + + if ((opCode & 0xff0000) != 0) + { + WriteByte((byte)(opCode >> 16)); + } + + if ((opCode & 0xff00) != 0) + { + WriteByte((byte)(opCode >> 8)); + } + + WriteByte((byte)opCode); + + if ((flags & InstructionFlags.RegOnly) == 0) + { + WriteByte((byte)modRM); + + if (needsSibByte) + { + WriteByte((byte)sib); + } + + if (needsDisplacement) + { + if (ConstFitsOnS8(memOp.Displacement)) + { + WriteByte((byte)memOp.Displacement); + } + else /* if (ConstFitsOnS32(memOp.Displacement)) */ + { + WriteInt32(memOp.Displacement); + } + } + } + } + + private void WriteCompactInst(Operand operand, int opCode) + { + int regIndex = operand.GetRegister().Index; + + if (regIndex >= 8) + { + WriteByte(0x41); + } + + WriteByte((byte)(opCode + (regIndex & 0b111))); + } + + private static int GetRexPrefix(Operand dest, Operand source, OperandType type, bool rrm) + { + int rexPrefix = 0; + + if (Is64Bits(type)) + { + rexPrefix = RexWPrefix; + } + + void SetRegisterHighBit(Register reg, int bit) + { + if (reg.Index >= 8) + { + rexPrefix |= RexPrefix | (reg.Index >> 3) << bit; + } + } + + if (dest != null && dest.Kind == OperandKind.Register) + { + SetRegisterHighBit(dest.GetRegister(), rrm ? 2 : 0); + } + + if (source != null && source.Kind == OperandKind.Register) + { + SetRegisterHighBit(source.GetRegister(), rrm ? 0 : 2); + } + + return rexPrefix; + } + + private static bool Is64Bits(OperandType type) + { + return type == OperandType.I64 || type == OperandType.FP64; + } + + private static bool IsImm8(ulong immediate, OperandType type) + { + long value = type == OperandType.I32 ? (int)immediate : (long)immediate; + + return ConstFitsOnS8(value); + } + + private static bool IsImm32(ulong immediate, OperandType type) + { + long value = type == OperandType.I32 ? (int)immediate : (long)immediate; + + return ConstFitsOnS32(value); + } + + public static int GetJccLength(long offset) + { + if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset)) + { + return 2; + } + else if (ConstFitsOnS32(offset < 0 ? offset - 6 : offset)) + { + return 6; + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + public static int GetJmpLength(long offset) + { + if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset)) + { + return 2; + } + else if (ConstFitsOnS32(offset < 0 ? offset - 5 : offset)) + { + return 5; + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + private static bool ConstFitsOnS8(long value) + { + return value == (sbyte)value; + } + + private static bool ConstFitsOnS32(long value) + { + return value == (int)value; + } + + private void WriteInt16(short value) + { + WriteUInt16((ushort)value); + } + + private void WriteInt32(int value) + { + WriteUInt32((uint)value); + } + + private void WriteByte(byte value) + { + _stream.WriteByte(value); + } + + private void WriteUInt16(ushort value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + } + + private void WriteUInt32(uint value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + } + + private void WriteUInt64(ulong value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + _stream.WriteByte((byte)(value >> 32)); + _stream.WriteByte((byte)(value >> 40)); + _stream.WriteByte((byte)(value >> 48)); + _stream.WriteByte((byte)(value >> 56)); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/CallConvName.cs b/ARMeilleure/CodeGen/X86/CallConvName.cs new file mode 100644 index 00000000..be367628 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/CallConvName.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum CallConvName + { + SystemV, + Windows + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/CallingConvention.cs b/ARMeilleure/CodeGen/X86/CallingConvention.cs new file mode 100644 index 00000000..2769fd93 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/CallingConvention.cs @@ -0,0 +1,159 @@ +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.CodeGen.X86 +{ + static class CallingConvention + { + private const int RegistersMask = 0xffff; + + public static int GetIntAvailableRegisters() + { + return RegistersMask & ~(1 << (int)X86Register.Rsp); + } + + public static int GetVecAvailableRegisters() + { + return RegistersMask; + } + + public static int GetIntCallerSavedRegisters() + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + return (1 << (int)X86Register.Rax) | + (1 << (int)X86Register.Rcx) | + (1 << (int)X86Register.Rdx) | + (1 << (int)X86Register.R8) | + (1 << (int)X86Register.R9) | + (1 << (int)X86Register.R10) | + (1 << (int)X86Register.R11); + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + return (1 << (int)X86Register.Rax) | + (1 << (int)X86Register.Rcx) | + (1 << (int)X86Register.Rdx) | + (1 << (int)X86Register.Rsi) | + (1 << (int)X86Register.Rdi) | + (1 << (int)X86Register.R8) | + (1 << (int)X86Register.R9) | + (1 << (int)X86Register.R10) | + (1 << (int)X86Register.R11); + } + } + + public static int GetVecCallerSavedRegisters() + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + return (1 << (int)X86Register.Xmm0) | + (1 << (int)X86Register.Xmm1) | + (1 << (int)X86Register.Xmm2) | + (1 << (int)X86Register.Xmm3) | + (1 << (int)X86Register.Xmm4) | + (1 << (int)X86Register.Xmm5); + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + return RegistersMask; + } + } + + public static int GetIntCalleeSavedRegisters() + { + return GetIntCallerSavedRegisters() ^ RegistersMask; + } + + public static int GetVecCalleeSavedRegisters() + { + return GetVecCallerSavedRegisters() ^ RegistersMask; + } + + public static int GetArgumentsOnRegsCount() + { + return 4; + } + + public static int GetIntArgumentsOnRegsCount() + { + return 6; + } + + public static int GetVecArgumentsOnRegsCount() + { + return 8; + } + + public static X86Register GetIntArgumentRegister(int index) + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + switch (index) + { + case 0: return X86Register.Rcx; + case 1: return X86Register.Rdx; + case 2: return X86Register.R8; + case 3: return X86Register.R9; + } + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + switch (index) + { + case 0: return X86Register.Rdi; + case 1: return X86Register.Rsi; + case 2: return X86Register.Rdx; + case 3: return X86Register.Rcx; + case 4: return X86Register.R8; + case 5: return X86Register.R9; + } + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static X86Register GetVecArgumentRegister(int index) + { + int count; + + if (GetCurrentCallConv() == CallConvName.Windows) + { + count = 4; + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + count = 8; + } + + if ((uint)index < count) + { + return X86Register.Xmm0 + index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static X86Register GetIntReturnRegister() + { + return X86Register.Rax; + } + + public static X86Register GetIntReturnRegisterHigh() + { + return X86Register.Rdx; + } + + public static X86Register GetVecReturnRegister() + { + return X86Register.Xmm0; + } + + public static CallConvName GetCurrentCallConv() + { + return RuntimeInformation.IsOSPlatform(OSPlatform.Windows) + ? CallConvName.Windows + : CallConvName.SystemV; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/ARMeilleure/CodeGen/X86/CodeGenContext.cs new file mode 100644 index 00000000..d719b516 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/CodeGenContext.cs @@ -0,0 +1,305 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; + +namespace ARMeilleure.CodeGen.X86 +{ + class CodeGenContext + { + private const int ReservedBytesForJump = 1; + + private Stream _stream; + + public int StreamOffset => (int)_stream.Length; + + public AllocationResult AllocResult { get; } + + public Assembler Assembler { get; } + + public BasicBlock CurrBlock { get; private set; } + + public int CallArgsRegionSize { get; } + public int XmmSaveRegionSize { get; } + + private long[] _blockOffsets; + + private struct Jump + { + public bool IsConditional { get; } + + public X86Condition Condition { get; } + + public BasicBlock Target { get; } + + public long JumpPosition { get; } + + public long RelativeOffset { get; set; } + + public int InstSize { get; set; } + + public Jump(BasicBlock target, long jumpPosition) + { + IsConditional = false; + Condition = 0; + Target = target; + JumpPosition = jumpPosition; + + RelativeOffset = 0; + + InstSize = 0; + } + + public Jump(X86Condition condition, BasicBlock target, long jumpPosition) + { + IsConditional = true; + Condition = condition; + Target = target; + JumpPosition = jumpPosition; + + RelativeOffset = 0; + + InstSize = 0; + } + } + + private List<Jump> _jumps; + + private X86Condition _jNearCondition; + + private long _jNearPosition; + private int _jNearLength; + + public CodeGenContext(Stream stream, AllocationResult allocResult, int maxCallArgs, int blocksCount) + { + _stream = stream; + + AllocResult = allocResult; + + Assembler = new Assembler(stream); + + CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int xmmSaveRegionSize); + XmmSaveRegionSize = xmmSaveRegionSize; + + _blockOffsets = new long[blocksCount]; + + _jumps = new List<Jump>(); + } + + private int GetCallArgsRegionSize(AllocationResult allocResult, int maxCallArgs, out int xmmSaveRegionSize) + { + // We need to add 8 bytes to the total size, as the call to this + // function already pushed 8 bytes (the return address). + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters; + + xmmSaveRegionSize = BitUtils.CountBits(vecMask) * 16; + + int calleeSaveRegionSize = BitUtils.CountBits(intMask) * 8 + xmmSaveRegionSize + 8; + + int argsCount = maxCallArgs; + + if (argsCount < 0) + { + // When the function has no calls, argsCount is -1. + // In this case, we don't need to allocate the shadow space. + argsCount = 0; + } + else if (argsCount < 4) + { + // The ABI mandates that the space for at least 4 arguments + // is reserved on the stack (this is called shadow space). + argsCount = 4; + } + + int frameSize = calleeSaveRegionSize + allocResult.SpillRegionSize; + + // TODO: Instead of always multiplying by 16 (the largest possible size of a variable, + // since a V128 has 16 bytes), we should calculate the exact size consumed by the + // arguments passed to the called functions on the stack. + int callArgsAndFrameSize = frameSize + argsCount * 16; + + // Ensure that the Stack Pointer will be aligned to 16 bytes. + callArgsAndFrameSize = (callArgsAndFrameSize + 0xf) & ~0xf; + + return callArgsAndFrameSize - frameSize; + } + + public void EnterBlock(BasicBlock block) + { + _blockOffsets[block.Index] = _stream.Position; + + CurrBlock = block; + } + + public void JumpTo(BasicBlock target) + { + _jumps.Add(new Jump(target, _stream.Position)); + + WritePadding(ReservedBytesForJump); + } + + public void JumpTo(X86Condition condition, BasicBlock target) + { + _jumps.Add(new Jump(condition, target, _stream.Position)); + + WritePadding(ReservedBytesForJump); + } + + public void JumpToNear(X86Condition condition) + { + _jNearCondition = condition; + _jNearPosition = _stream.Position; + _jNearLength = Assembler.GetJccLength(0); + + _stream.Seek(_jNearLength, SeekOrigin.Current); + } + + public void JumpHere() + { + long currentPosition = _stream.Position; + + _stream.Seek(_jNearPosition, SeekOrigin.Begin); + + long offset = currentPosition - (_jNearPosition + _jNearLength); + + Debug.Assert(_jNearLength == Assembler.GetJccLength(offset), "Relative offset doesn't fit on near jump."); + + Assembler.Jcc(_jNearCondition, offset); + + _stream.Seek(currentPosition, SeekOrigin.Begin); + } + + private void WritePadding(int size) + { + while (size-- > 0) + { + _stream.WriteByte(0); + } + } + + public byte[] GetCode() + { + // Write jump relative offsets. + bool modified; + + do + { + modified = false; + + for (int index = 0; index < _jumps.Count; index++) + { + Jump jump = _jumps[index]; + + long jumpTarget = _blockOffsets[jump.Target.Index]; + + long offset = jumpTarget - jump.JumpPosition; + + if (offset < 0) + { + for (int index2 = index - 1; index2 >= 0; index2--) + { + Jump jump2 = _jumps[index2]; + + if (jump2.JumpPosition < jumpTarget) + { + break; + } + + offset -= jump2.InstSize - ReservedBytesForJump; + } + } + else + { + for (int index2 = index + 1; index2 < _jumps.Count; index2++) + { + Jump jump2 = _jumps[index2]; + + if (jump2.JumpPosition >= jumpTarget) + { + break; + } + + offset += jump2.InstSize - ReservedBytesForJump; + } + + offset -= ReservedBytesForJump; + } + + if (jump.IsConditional) + { + jump.InstSize = Assembler.GetJccLength(offset); + } + else + { + jump.InstSize = Assembler.GetJmpLength(offset); + } + + // The jump is relative to the next instruction, not the current one. + // Since we didn't know the next instruction address when calculating + // the offset (as the size of the current jump instruction was not know), + // we now need to compensate the offset with the jump instruction size. + // It's also worth to note that: + // - This is only needed for backward jumps. + // - The GetJmpLength and GetJccLength also compensates the offset + // internally when computing the jump instruction size. + if (offset < 0) + { + offset -= jump.InstSize; + } + + if (jump.RelativeOffset != offset) + { + modified = true; + } + + jump.RelativeOffset = offset; + + _jumps[index] = jump; + } + } + while (modified); + + // Write the code, ignoring the dummy bytes after jumps, into a new stream. + _stream.Seek(0, SeekOrigin.Begin); + + using (MemoryStream codeStream = new MemoryStream()) + { + Assembler assembler = new Assembler(codeStream); + + byte[] buffer; + + for (int index = 0; index < _jumps.Count; index++) + { + Jump jump = _jumps[index]; + + buffer = new byte[jump.JumpPosition - _stream.Position]; + + _stream.Read(buffer, 0, buffer.Length); + _stream.Seek(ReservedBytesForJump, SeekOrigin.Current); + + codeStream.Write(buffer); + + if (jump.IsConditional) + { + assembler.Jcc(jump.Condition, jump.RelativeOffset); + } + else + { + assembler.Jmp(jump.RelativeOffset); + } + } + + buffer = new byte[_stream.Length - _stream.Position]; + + _stream.Read(buffer, 0, buffer.Length); + + codeStream.Write(buffer); + + return codeStream.ToArray(); + } + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs new file mode 100644 index 00000000..ae24b563 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -0,0 +1,1661 @@ +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Common; +using ARMeilleure.Diagnostics; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; + +namespace ARMeilleure.CodeGen.X86 +{ + static class CodeGenerator + { + private const int PageSize = 0x1000; + private const int StackGuardSize = 0x2000; + + private static Action<CodeGenContext, Operation>[] _instTable; + + static CodeGenerator() + { + _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))]; + + Add(Instruction.Add, GenerateAdd); + Add(Instruction.BitwiseAnd, GenerateBitwiseAnd); + Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr); + Add(Instruction.BitwiseNot, GenerateBitwiseNot); + Add(Instruction.BitwiseOr, GenerateBitwiseOr); + Add(Instruction.Branch, GenerateBranch); + Add(Instruction.BranchIfFalse, GenerateBranchIfFalse); + Add(Instruction.BranchIfTrue, GenerateBranchIfTrue); + Add(Instruction.ByteSwap, GenerateByteSwap); + Add(Instruction.Call, GenerateCall); + Add(Instruction.Clobber, GenerateClobber); + Add(Instruction.CompareAndSwap128, GenerateCompareAndSwap128); + Add(Instruction.CompareEqual, GenerateCompareEqual); + Add(Instruction.CompareGreater, GenerateCompareGreater); + Add(Instruction.CompareGreaterOrEqual, GenerateCompareGreaterOrEqual); + Add(Instruction.CompareGreaterOrEqualUI, GenerateCompareGreaterOrEqualUI); + Add(Instruction.CompareGreaterUI, GenerateCompareGreaterUI); + Add(Instruction.CompareLess, GenerateCompareLess); + Add(Instruction.CompareLessOrEqual, GenerateCompareLessOrEqual); + Add(Instruction.CompareLessOrEqualUI, GenerateCompareLessOrEqualUI); + Add(Instruction.CompareLessUI, GenerateCompareLessUI); + Add(Instruction.CompareNotEqual, GenerateCompareNotEqual); + Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32); + Add(Instruction.ConvertToFP, GenerateConvertToFP); + Add(Instruction.Copy, GenerateCopy); + Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros); + Add(Instruction.CpuId, GenerateCpuId); + Add(Instruction.Divide, GenerateDivide); + Add(Instruction.DivideUI, GenerateDivideUI); + Add(Instruction.Fill, GenerateFill); + Add(Instruction.Load, GenerateLoad); + Add(Instruction.Load16, GenerateLoad16); + Add(Instruction.Load8, GenerateLoad8); + Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI); + Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI); + Add(Instruction.Negate, GenerateNegate); + Add(Instruction.Return, GenerateReturn); + Add(Instruction.RotateRight, GenerateRotateRight); + Add(Instruction.ShiftLeft, GenerateShiftLeft); + Add(Instruction.ShiftRightSI, GenerateShiftRightSI); + Add(Instruction.ShiftRightUI, GenerateShiftRightUI); + Add(Instruction.SignExtend16, GenerateSignExtend16); + Add(Instruction.SignExtend32, GenerateSignExtend32); + Add(Instruction.SignExtend8, GenerateSignExtend8); + Add(Instruction.Spill, GenerateSpill); + Add(Instruction.SpillArg, GenerateSpillArg); + Add(Instruction.StackAlloc, GenerateStackAlloc); + Add(Instruction.Store, GenerateStore); + Add(Instruction.Store16, GenerateStore16); + Add(Instruction.Store8, GenerateStore8); + Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar); + Add(Instruction.VectorExtract, GenerateVectorExtract); + Add(Instruction.VectorExtract16, GenerateVectorExtract16); + Add(Instruction.VectorExtract8, GenerateVectorExtract8); + Add(Instruction.VectorInsert, GenerateVectorInsert); + Add(Instruction.VectorInsert16, GenerateVectorInsert16); + Add(Instruction.VectorInsert8, GenerateVectorInsert8); + Add(Instruction.VectorOne, GenerateVectorOne); + Add(Instruction.VectorZero, GenerateVectorZero); + Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64); + Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96); + Add(Instruction.ZeroExtend16, GenerateZeroExtend16); + Add(Instruction.ZeroExtend32, GenerateZeroExtend32); + Add(Instruction.ZeroExtend8, GenerateZeroExtend8); + } + + private static void Add(Instruction inst, Action<CodeGenContext, Operation> func) + { + _instTable[(int)inst] = func; + } + + public static CompiledFunction Generate(CompilerContext cctx) + { + ControlFlowGraph cfg = cctx.Cfg; + + Logger.StartPass(PassName.Optimization); + + if ((cctx.Options & CompilerOptions.SsaForm) != 0 && + (cctx.Options & CompilerOptions.Optimize) != 0) + { + Optimizer.RunPass(cfg); + } + + Logger.EndPass(PassName.Optimization, cfg); + + Logger.StartPass(PassName.PreAllocation); + + StackAllocator stackAlloc = new StackAllocator(); + + PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs); + + Logger.EndPass(PassName.PreAllocation, cfg); + + Logger.StartPass(PassName.RegisterAllocation); + + if ((cctx.Options & CompilerOptions.SsaForm) != 0) + { + Ssa.Deconstruct(cfg); + } + + IRegisterAllocator regAlloc; + + if ((cctx.Options & CompilerOptions.Lsra) != 0) + { + regAlloc = new LinearScanAllocator(); + } + else + { + regAlloc = new HybridAllocator(); + } + + RegisterMasks regMasks = new RegisterMasks( + CallingConvention.GetIntAvailableRegisters(), + CallingConvention.GetVecAvailableRegisters(), + CallingConvention.GetIntCallerSavedRegisters(), + CallingConvention.GetVecCallerSavedRegisters(), + CallingConvention.GetIntCalleeSavedRegisters(), + CallingConvention.GetVecCalleeSavedRegisters()); + + AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks); + + Logger.EndPass(PassName.RegisterAllocation, cfg); + + Logger.StartPass(PassName.CodeGeneration); + + using (MemoryStream stream = new MemoryStream()) + { + CodeGenContext context = new CodeGenContext(stream, allocResult, maxCallArgs, cfg.Blocks.Count); + + UnwindInfo unwindInfo = WritePrologue(context); + + foreach (BasicBlock block in cfg.Blocks) + { + context.EnterBlock(block); + + foreach (Node node in block.Operations) + { + if (node is Operation operation) + { + GenerateOperation(context, operation); + } + } + } + + Logger.EndPass(PassName.CodeGeneration); + + return new CompiledFunction(context.GetCode(), unwindInfo); + } + } + + private static void GenerateOperation(CodeGenContext context, Operation operation) + { + if (operation.Instruction == Instruction.Extended) + { + IntrinsicOperation intrinOp = (IntrinsicOperation)operation; + + IntrinsicInfo info = IntrinsicTable.GetInfo(intrinOp.Intrinsic); + + switch (info.Type) + { + case IntrinsicType.Comis_: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + switch (intrinOp.Intrinsic) + { + case Intrinsic.X86Comisdeq: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Equal); + break; + + case Intrinsic.X86Comisdge: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.AboveOrEqual); + break; + + case Intrinsic.X86Comisdlt: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Below); + break; + + case Intrinsic.X86Comisseq: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Equal); + break; + + case Intrinsic.X86Comissge: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.AboveOrEqual); + break; + + case Intrinsic.X86Comisslt: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Below); + break; + } + + context.Assembler.Movzx8(dest, dest, OperandType.I32); + + break; + } + + case IntrinsicType.PopCount: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Popcnt(dest, source, dest.Type); + + break; + } + + case IntrinsicType.Unary: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.WriteInstruction(info.Inst, dest, source); + + break; + } + + case IntrinsicType.UnaryToGpr: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger()); + + context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type); + + break; + } + + case IntrinsicType.Binary: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger()); + Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2); + + break; + } + + case IntrinsicType.BinaryImm: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte()); + + break; + } + + case IntrinsicType.Ternary: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src1, src2, src3); + + Debug.Assert(!dest.Type.IsInteger()); + + if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding) + { + context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3); + } + else + { + EnsureSameReg(dest, src1); + + Debug.Assert(src3.GetRegister().Index == 0); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2); + } + + break; + } + + case IntrinsicType.TernaryImm: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src1, src2); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte()); + + break; + } + } + } + else + { + Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction]; + + if (func != null) + { + func(context, operation); + } + else + { + throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\"."); + } + } + } + + private static void GenerateAdd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Add(dest, src2, dest.Type); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Addss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Addsd(dest, src1, src2); + } + } + + private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.And(dest, src2, dest.Type); + } + + private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Xor(dest, src2, dest.Type); + } + else + { + context.Assembler.Xorps(dest, src1, src2); + } + } + + private static void GenerateBitwiseNot(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Not(dest); + } + + private static void GenerateBitwiseOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Or(dest, src2, dest.Type); + } + + private static void GenerateBranch(CodeGenContext context, Operation operation) + { + context.JumpTo(context.CurrBlock.Branch); + } + + private static void GenerateBranchIfFalse(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(0); + + context.Assembler.Test(source, source, source.Type); + + context.JumpTo(X86Condition.Equal, context.CurrBlock.Branch); + } + + private static void GenerateBranchIfTrue(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(0); + + context.Assembler.Test(source, source, source.Type); + + context.JumpTo(X86Condition.NotEqual, context.CurrBlock.Branch); + } + + private static void GenerateByteSwap(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Bswap(dest); + } + + private static void GenerateCall(CodeGenContext context, Operation operation) + { + context.Assembler.Call(operation.GetSource(0)); + } + + private static void GenerateClobber(CodeGenContext context, Operation operation) + { + // This is only used to indicate that a register is clobbered to the + // register allocator, we don't need to produce any code. + } + + private static void GenerateCompareAndSwap128(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(0); + + MemoryOperand memOp = new MemoryOperand(OperandType.I64, source); + + context.Assembler.Cmpxchg16b(memOp); + } + + private static void GenerateCompareEqual(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.Equal); + } + + private static void GenerateCompareGreater(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.Greater); + } + + private static void GenerateCompareGreaterOrEqual(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.GreaterOrEqual); + } + + private static void GenerateCompareGreaterOrEqualUI(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.AboveOrEqual); + } + + private static void GenerateCompareGreaterUI(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.Above); + } + + private static void GenerateCompareLess(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.Less); + } + + private static void GenerateCompareLessOrEqual(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.LessOrEqual); + } + + private static void GenerateCompareLessOrEqualUI(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.BelowOrEqual); + } + + private static void GenerateCompareLessUI(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.Below); + } + + private static void GenerateCompareNotEqual(CodeGenContext context, Operation operation) + { + GenerateCompare(context, operation, X86Condition.NotEqual); + } + + private static void GenerateCompare(CodeGenContext context, Operation operation, X86Condition condition) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(src1, src2); + + Debug.Assert(dest.Type == OperandType.I32); + + context.Assembler.Cmp(src1, src2, src1.Type); + context.Assembler.Setcc(dest, condition); + context.Assembler.Movzx8(dest, dest, OperandType.I32); + } + + private static void GenerateConditionalSelect(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameReg (dest, src3); + EnsureSameType(dest, src2, src3); + + Debug.Assert(dest.Type.IsInteger()); + Debug.Assert(src1.Type == OperandType.I32); + + context.Assembler.Test (src1, src1, src1.Type); + context.Assembler.Cmovcc(dest, src2, dest.Type, X86Condition.NotEqual); + } + + private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64); + + context.Assembler.Mov(dest, source, OperandType.I32); + } + + private static void GenerateConvertToFP(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + + if (dest.Type == OperandType.FP32) + { + Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP64); + + if (source.Type.IsInteger()) + { + context.Assembler.Xorps (dest, dest, dest); + context.Assembler.Cvtsi2ss(dest, dest, source, source.Type); + } + else /* if (source.Type == OperandType.FP64) */ + { + context.Assembler.Cvtsd2ss(dest, dest, source); + + GenerateZeroUpper96(context, dest, dest); + } + } + else /* if (dest.Type == OperandType.FP64) */ + { + Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP32); + + if (source.Type.IsInteger()) + { + context.Assembler.Xorps (dest, dest, dest); + context.Assembler.Cvtsi2sd(dest, dest, source, source.Type); + } + else /* if (source.Type == OperandType.FP32) */ + { + context.Assembler.Cvtss2sd(dest, dest, source); + + GenerateZeroUpper64(context, dest, dest); + } + } + } + + private static void GenerateCopy(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant); + + // Moves to the same register are useless. + if (dest.Kind == source.Kind && dest.Value == source.Value) + { + return; + } + + if (dest.Kind == OperandKind.Register && + source.Kind == OperandKind.Constant && source.Value == 0) + { + // Assemble "mov reg, 0" as "xor reg, reg" as the later is more efficient. + context.Assembler.Xor(dest, dest, OperandType.I32); + } + else if (dest.Type.IsInteger()) + { + context.Assembler.Mov(dest, source, dest.Type); + } + else + { + context.Assembler.Movdqu(dest, source); + } + } + + private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Bsr(dest, source, dest.Type); + + int operandSize = dest.Type == OperandType.I32 ? 32 : 64; + int operandMask = operandSize - 1; + + // When the input operand is 0, the result is undefined, however the + // ZF flag is set. We are supposed to return the operand size on that + // case. So, add an additional jump to handle that case, by moving the + // operand size constant to the destination register. + context.JumpToNear(X86Condition.NotEqual); + + context.Assembler.Mov(dest, new Operand(operandSize | operandMask), OperandType.I32); + + context.JumpHere(); + + // BSR returns the zero based index of the last bit set on the operand, + // starting from the least significant bit. However we are supposed to + // return the number of 0 bits on the high end. So, we invert the result + // of the BSR using XOR to get the correct value. + context.Assembler.Xor(dest, new Operand(operandMask), OperandType.I32); + } + + private static void GenerateCpuId(CodeGenContext context, Operation operation) + { + context.Assembler.Cpuid(); + } + + private static void GenerateDivide(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + if (!dest.Type.IsInteger()) + { + ValidateBinOp(dest, dividend, divisor); + } + + if (dest.Type.IsInteger()) + { + divisor = operation.GetSource(2); + + EnsureSameType(dest, divisor); + + if (divisor.Type == OperandType.I32) + { + context.Assembler.Cdq(); + } + else + { + context.Assembler.Cqo(); + } + + context.Assembler.Idiv(divisor); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Divss(dest, dividend, divisor); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Divsd(dest, dividend, divisor); + } + } + + private static void GenerateDivideUI(CodeGenContext context, Operation operation) + { + Operand divisor = operation.GetSource(2); + + Operand rdx = Register(X86Register.Rdx); + + Debug.Assert(divisor.Type.IsInteger()); + + context.Assembler.Xor(rdx, rdx, OperandType.I32); + context.Assembler.Div(divisor); + } + + private static void GenerateFill(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize; + + Operand rsp = Register(X86Register.Rsp); + + MemoryOperand memOp = new MemoryOperand(dest.Type, rsp, null, Multiplier.x1, offs); + + GenerateLoad(context, memOp, dest); + } + + private static void GenerateLoad(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + GenerateLoad(context, address, value); + } + + private static void GenerateLoad16(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Movzx16(value, address, value.Type); + } + + private static void GenerateLoad8(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Movzx8(value, address, value.Type); + } + + private static void GenerateMultiply(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + if (src2.Kind != OperandKind.Constant) + { + EnsureSameReg(dest, src1); + } + + EnsureSameType(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + if (src2.Kind == OperandKind.Constant) + { + context.Assembler.Imul(dest, src1, src2, dest.Type); + } + else + { + context.Assembler.Imul(dest, src2, dest.Type); + } + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Mulss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Mulsd(dest, src1, src2); + } + } + + private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(1); + + Debug.Assert(source.Type == OperandType.I64); + + context.Assembler.Imul(source); + } + + private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(1); + + Debug.Assert(source.Type == OperandType.I64); + + context.Assembler.Mul(source); + } + + private static void GenerateNegate(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Neg(dest); + } + + private static void GenerateReturn(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Return(); + } + + private static void GenerateRotateRight(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Ror(dest, src2, dest.Type); + } + + private static void GenerateShiftLeft(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Shl(dest, src2, dest.Type); + } + + private static void GenerateShiftRightSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Sar(dest, src2, dest.Type); + } + + private static void GenerateShiftRightUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Shr(dest, src2, dest.Type); + } + + private static void GenerateSignExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx16(dest, source, dest.Type); + } + + private static void GenerateSignExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx32(dest, source, dest.Type); + } + + private static void GenerateSignExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx8(dest, source, dest.Type); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, context.CallArgsRegionSize); + } + + private static void GenerateSpillArg(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, 0); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset) + { + Operand offset = operation.GetSource(0); + Operand source = operation.GetSource(1); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + baseOffset; + + Operand rsp = Register(X86Register.Rsp); + + MemoryOperand memOp = new MemoryOperand(source.Type, rsp, null, Multiplier.x1, offs); + + GenerateStore(context, memOp, source); + } + + private static void GenerateStackAlloc(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize; + + Operand rsp = Register(X86Register.Rsp); + + MemoryOperand memOp = new MemoryOperand(OperandType.I64, rsp, null, Multiplier.x1, offs); + + context.Assembler.Lea(dest, memOp, OperandType.I64); + } + + private static void GenerateStore(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + GenerateStore(context, address, value); + } + + private static void GenerateStore16(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Mov16(address, value); + } + + private static void GenerateStore8(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Mov8(address, value); + } + + private static void GenerateSubtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sub(dest, src2, dest.Type); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Subss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Subsd(dest, src1, src2); + } + } + + private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger()); + + if (source.Type == OperandType.I32) + { + context.Assembler.Movd(dest, source); + } + else /* if (source.Type == OperandType.I64) */ + { + context.Assembler.Movq(dest, source); + } + } + + private static void GenerateVectorExtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + if (dest.Type == OperandType.I32) + { + Debug.Assert(index < 4); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrd(dest, src1, index); + } + else + { + if (index != 0) + { + int mask0 = 0b11_10_01_00; + int mask1 = 0b11_10_01_00; + + mask0 = BitUtils.RotateRight(mask0, index * 2, 8); + mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8); + + context.Assembler.Pshufd(src1, src1, (byte)mask0); + context.Assembler.Movd (dest, src1); + context.Assembler.Pshufd(src1, src1, (byte)mask1); + } + else + { + context.Assembler.Movd(dest, src1); + } + } + } + else if (dest.Type == OperandType.I64) + { + Debug.Assert(index < 2); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrq(dest, src1, index); + } + else + { + if (index != 0) + { + const byte mask = 0b01_00_11_10; + + context.Assembler.Pshufd(src1, src1, mask); + context.Assembler.Movq (dest, src1); + context.Assembler.Pshufd(src1, src1, mask); + } + else + { + context.Assembler.Movq(dest, src1); + } + } + } + else + { + Debug.Assert(index < (dest.Type == OperandType.FP32 ? 4 : 2)); + + // Floating-point types. + if ((index >= 2 && dest.Type == OperandType.FP32) || + (index == 1 && dest.Type == OperandType.FP64)) + { + context.Assembler.Movhlps(dest, dest, src1); + context.Assembler.Movq (dest, dest); + } + else + { + context.Assembler.Movq(dest, src1); + } + + if (dest.Type == OperandType.FP32) + { + context.Assembler.Pshufd(dest, dest, (byte)(0xfc | (index & 1))); + } + } + } + + private static void GenerateVectorExtract16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 8); + + context.Assembler.Pextrw(dest, src1, index); + } + + private static void GenerateVectorExtract8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 16); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrb(dest, src1, index); + } + else + { + context.Assembler.Pextrw(dest, src1, (byte)(index >> 1)); + + if ((index & 1) != 0) + { + context.Assembler.Shr(dest, new Operand(8), OperandType.I32); + } + else + { + context.Assembler.Movzx8(dest, dest, OperandType.I32); + } + } + } + + private static void GenerateVectorInsert(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + void InsertIntSse2(int words) + { + if (dest.GetRegister() != src1.GetRegister()) + { + context.Assembler.Movdqu(dest, src1); + } + + for (int word = 0; word < words; word++) + { + // Insert lower 16-bits. + context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word)); + + // Move next word down. + context.Assembler.Ror(src2, new Operand(16), src2.Type); + } + } + + if (src2.Type == OperandType.I32) + { + Debug.Assert(index < 4); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pinsrd(dest, src1, src2, index); + } + else + { + InsertIntSse2(2); + } + } + else if (src2.Type == OperandType.I64) + { + Debug.Assert(index < 2); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pinsrq(dest, src1, src2, index); + } + else + { + InsertIntSse2(4); + } + } + else if (src2.Type == OperandType.FP32) + { + Debug.Assert(index < 4); + + if (index != 0) + { + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4)); + } + else + { + if (src1.GetRegister() == src2.GetRegister()) + { + int mask = 0b11_10_01_00; + + mask &= ~(0b11 << index * 2); + + context.Assembler.Pshufd(dest, src1, (byte)mask); + } + else + { + int mask0 = 0b11_10_01_00; + int mask1 = 0b11_10_01_00; + + mask0 = BitUtils.RotateRight(mask0, index * 2, 8); + mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8); + + context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0. + context.Assembler.Movss (dest, src1, src2); // dest[127:0] = src1[127:32] | src2[31:0] + context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position. + + if (dest.GetRegister() != src1.GetRegister()) + { + context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1. + } + } + } + } + else + { + context.Assembler.Movss(dest, src1, src2); + } + } + else /* if (src2.Type == OperandType.FP64) */ + { + Debug.Assert(index < 2); + + if (index != 0) + { + context.Assembler.Movlhps(dest, src1, src2); + } + else + { + context.Assembler.Movsd(dest, src1, src2); + } + } + } + + private static void GenerateVectorInsert16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Pinsrw(dest, src1, src2, index); + } + + private static void GenerateVectorInsert8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + // It's not possible to emulate this instruction without + // SSE 4.1 support without the use of a temporary register, + // so we instead handle that case on the pre-allocator when + // SSE 4.1 is not supported on the CPU. + Debug.Assert(HardwareCapabilities.SupportsSse41); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Pinsrb(dest, src1, src2, index); + } + + private static void GenerateVectorOne(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.Pcmpeqw(dest, dest, dest); + } + + private static void GenerateVectorZero(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.Xorps(dest, dest, dest); + } + + private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + GenerateZeroUpper64(context, dest, source); + } + + private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + GenerateZeroUpper96(context, dest, source); + } + + private static void GenerateZeroExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movzx16(dest, source, OperandType.I32); + } + + private static void GenerateZeroExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Mov(dest, source, OperandType.I32); + } + + private static void GenerateZeroExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movzx8(dest, source, OperandType.I32); + } + + private static void GenerateLoad(CodeGenContext context, Operand address, Operand value) + { + switch (value.Type) + { + case OperandType.I32: context.Assembler.Mov (value, address, OperandType.I32); break; + case OperandType.I64: context.Assembler.Mov (value, address, OperandType.I64); break; + case OperandType.FP32: context.Assembler.Movd (value, address); break; + case OperandType.FP64: context.Assembler.Movq (value, address); break; + case OperandType.V128: context.Assembler.Movdqu(value, address); break; + + default: Debug.Assert(false); break; + } + } + + private static void GenerateStore(CodeGenContext context, Operand address, Operand value) + { + switch (value.Type) + { + case OperandType.I32: context.Assembler.Mov (address, value, OperandType.I32); break; + case OperandType.I64: context.Assembler.Mov (address, value, OperandType.I64); break; + case OperandType.FP32: context.Assembler.Movd (address, value); break; + case OperandType.FP64: context.Assembler.Movq (address, value); break; + case OperandType.V128: context.Assembler.Movdqu(address, value); break; + + default: Debug.Assert(false); break; + } + } + + private static void GenerateZeroUpper64(CodeGenContext context, Operand dest, Operand source) + { + context.Assembler.Movq(dest, source); + } + + private static void GenerateZeroUpper96(CodeGenContext context, Operand dest, Operand source) + { + context.Assembler.Movq(dest, source); + context.Assembler.Pshufd(dest, dest, 0xfc); + } + + private static void ValidateUnOp(Operand dest, Operand source) + { +#if DEBUG + EnsureSameReg (dest, source); + EnsureSameType(dest, source); +#endif + } + + private static void ValidateBinOp(Operand dest, Operand src1, Operand src2) + { +#if DEBUG + EnsureSameReg (dest, src1); + EnsureSameType(dest, src1, src2); +#endif + } + + private static void ValidateShift(Operand dest, Operand src1, Operand src2) + { +#if DEBUG + EnsureSameReg (dest, src1); + EnsureSameType(dest, src1); + + Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32); +#endif + } + + private static void EnsureSameReg(Operand op1, Operand op2) + { + if (!op1.Type.IsInteger() && HardwareCapabilities.SupportsVexEncoding) + { + return; + } + + Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory); + Debug.Assert(op1.Kind == op2.Kind); + Debug.Assert(op1.Value == op2.Value); + } + + private static void EnsureSameType(Operand op1, Operand op2) + { + Debug.Assert(op1.Type == op2.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + Debug.Assert(op1.Type == op4.Type); + } + + private static UnwindInfo WritePrologue(CodeGenContext context) + { + List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>(); + + Operand rsp = Register(X86Register.Rsp); + + int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + + while (mask != 0) + { + int bit = BitUtils.LowestBitSet(mask); + + context.Assembler.Push(Register((X86Register)bit)); + + pushEntries.Add(new UnwindPushEntry(bit, RegisterType.Integer, context.StreamOffset)); + + mask &= ~(1 << bit); + } + + int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize; + + reservedStackSize += context.XmmSaveRegionSize; + + if (reservedStackSize >= StackGuardSize) + { + GenerateInlineStackProbe(context, reservedStackSize); + } + + if (reservedStackSize != 0) + { + context.Assembler.Sub(rsp, new Operand(reservedStackSize), OperandType.I64); + } + + int offset = reservedStackSize; + + mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + while (mask != 0) + { + int bit = BitUtils.LowestBitSet(mask); + + offset -= 16; + + MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset); + + context.Assembler.Movdqu(memOp, Xmm((X86Register)bit)); + + pushEntries.Add(new UnwindPushEntry(bit, RegisterType.Vector, context.StreamOffset)); + + mask &= ~(1 << bit); + } + + return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset, reservedStackSize); + } + + private static void WriteEpilogue(CodeGenContext context) + { + Operand rsp = Register(X86Register.Rsp); + + int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize; + + reservedStackSize += context.XmmSaveRegionSize; + + int offset = reservedStackSize; + + int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + while (mask != 0) + { + int bit = BitUtils.LowestBitSet(mask); + + offset -= 16; + + MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset); + + context.Assembler.Movdqu(Xmm((X86Register)bit), memOp); + + mask &= ~(1 << bit); + } + + if (reservedStackSize != 0) + { + context.Assembler.Add(rsp, new Operand(reservedStackSize), OperandType.I64); + } + + mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + + while (mask != 0) + { + int bit = BitUtils.HighestBitSet(mask); + + context.Assembler.Pop(Register((X86Register)bit)); + + mask &= ~(1 << bit); + } + } + + private static void GenerateInlineStackProbe(CodeGenContext context, int size) + { + // Windows does lazy stack allocation, and there are just 2 + // guard pages on the end of the stack. So, if the allocation + // size we make is greater than this guard size, we must ensure + // that the OS will map all pages that we'll use. We do that by + // doing a dummy read on those pages, forcing a page fault and + // the OS to map them. If they are already mapped, nothing happens. + const int pageMask = PageSize - 1; + + size = (size + pageMask) & ~pageMask; + + Operand rsp = Register(X86Register.Rsp); + Operand temp = Register(CallingConvention.GetIntReturnRegister()); + + for (int offset = PageSize; offset < size; offset += PageSize) + { + Operand memOp = new MemoryOperand(OperandType.I32, rsp, null, Multiplier.x1, -offset);; + + context.Assembler.Mov(temp, memOp, OperandType.I32); + } + } + + private static MemoryOperand Memory(Operand operand, OperandType type) + { + if (operand.Kind == OperandKind.Memory) + { + return operand as MemoryOperand; + } + + return new MemoryOperand(type, operand); + } + + private static Operand Register(X86Register register, OperandType type = OperandType.I64) + { + return new Operand((int)register, RegisterType.Integer, type); + } + + private static Operand Xmm(X86Register register) + { + return new Operand((int)register, RegisterType.Vector, OperandType.V128); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs new file mode 100644 index 00000000..7f930d6b --- /dev/null +++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs @@ -0,0 +1,52 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +namespace ARMeilleure.CodeGen.X86 +{ + static class HardwareCapabilities + { + private delegate ulong GetFeatureInfo(); + + private static ulong _featureInfo; + + public static bool SupportsSse3 => (_featureInfo & (1UL << 0)) != 0; + public static bool SupportsPclmulqdq => (_featureInfo & (1UL << 1)) != 0; + public static bool SupportsSsse3 => (_featureInfo & (1UL << 9)) != 0; + public static bool SupportsFma => (_featureInfo & (1UL << 12)) != 0; + public static bool SupportsCx16 => (_featureInfo & (1UL << 13)) != 0; + public static bool SupportsSse41 => (_featureInfo & (1UL << 19)) != 0; + public static bool SupportsSse42 => (_featureInfo & (1UL << 20)) != 0; + public static bool SupportsPopcnt => (_featureInfo & (1UL << 23)) != 0; + public static bool SupportsAesni => (_featureInfo & (1UL << 25)) != 0; + public static bool SupportsAvx => (_featureInfo & (1UL << 28)) != 0; + public static bool SupportsF16c => (_featureInfo & (1UL << 29)) != 0; + + public static bool SupportsSse => (_featureInfo & (1UL << 32 + 25)) != 0; + public static bool SupportsSse2 => (_featureInfo & (1UL << 32 + 26)) != 0; + + public static bool ForceLegacySse { get; set; } + + public static bool SupportsVexEncoding => !ForceLegacySse && SupportsAvx; + + static HardwareCapabilities() + { + EmitterContext context = new EmitterContext(); + + Operand featureInfo = context.CpuId(); + + context.Return(featureInfo); + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[0]; + + GetFeatureInfo getFeatureInfo = Compiler.Compile<GetFeatureInfo>( + cfg, + argTypes, + OperandType.I64, + CompilerOptions.HighCq); + + _featureInfo = getFeatureInfo(); + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs b/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs new file mode 100644 index 00000000..b1af352b --- /dev/null +++ b/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.X86 +{ + struct IntrinsicInfo + { + public X86Instruction Inst { get; } + public IntrinsicType Type { get; } + + public IntrinsicInfo(X86Instruction inst, IntrinsicType type) + { + Inst = inst; + Type = type; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs new file mode 100644 index 00000000..e225f254 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -0,0 +1,160 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.X86 +{ + static class IntrinsicTable + { + private const int BadOp = 0; + + private static IntrinsicInfo[] _intrinTable; + + static IntrinsicTable() + { + _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))]; + + Add(Intrinsic.X86Addpd, new IntrinsicInfo(X86Instruction.Addpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary)); + Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary)); + Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary)); + Add(Intrinsic.X86Cmppd, new IntrinsicInfo(X86Instruction.Cmppd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpps, new IntrinsicInfo(X86Instruction.Cmpps, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpsd, new IntrinsicInfo(X86Instruction.Cmpsd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpss, new IntrinsicInfo(X86Instruction.Cmpss, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Comisdeq, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisdge, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisdlt, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtpd2ps, new IntrinsicInfo(X86Instruction.Cvtpd2ps, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtps2dq, new IntrinsicInfo(X86Instruction.Cvtps2dq, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr)); + Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary)); + Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary)); + Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary)); + Add(Intrinsic.X86Divsd, new IntrinsicInfo(X86Instruction.Divsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Divss, new IntrinsicInfo(X86Instruction.Divss, IntrinsicType.Binary)); + Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxss, new IntrinsicInfo(X86Instruction.Maxss, IntrinsicType.Binary)); + Add(Intrinsic.X86Minpd, new IntrinsicInfo(X86Instruction.Minpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Minps, new IntrinsicInfo(X86Instruction.Minps, IntrinsicType.Binary)); + Add(Intrinsic.X86Minsd, new IntrinsicInfo(X86Instruction.Minsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary)); + Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary)); + Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary)); + Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary)); + Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pavgw, new IntrinsicInfo(X86Instruction.Pavgw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pblendvb, new IntrinsicInfo(X86Instruction.Pblendvb, IntrinsicType.Ternary)); + Add(Intrinsic.X86Pcmpeqb, new IntrinsicInfo(X86Instruction.Pcmpeqb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqd, new IntrinsicInfo(X86Instruction.Pcmpeqd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqq, new IntrinsicInfo(X86Instruction.Pcmpeqq, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqw, new IntrinsicInfo(X86Instruction.Pcmpeqw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtb, new IntrinsicInfo(X86Instruction.Pcmpgtb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtd, new IntrinsicInfo(X86Instruction.Pcmpgtd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtq, new IntrinsicInfo(X86Instruction.Pcmpgtq, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtw, new IntrinsicInfo(X86Instruction.Pcmpgtw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsb, new IntrinsicInfo(X86Instruction.Pmaxsb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsd, new IntrinsicInfo(X86Instruction.Pmaxsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsw, new IntrinsicInfo(X86Instruction.Pmaxsw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxub, new IntrinsicInfo(X86Instruction.Pmaxub, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxud, new IntrinsicInfo(X86Instruction.Pmaxud, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxuw, new IntrinsicInfo(X86Instruction.Pmaxuw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsb, new IntrinsicInfo(X86Instruction.Pminsb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsd, new IntrinsicInfo(X86Instruction.Pminsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsw, new IntrinsicInfo(X86Instruction.Pminsw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminub, new IntrinsicInfo(X86Instruction.Pminub, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminud, new IntrinsicInfo(X86Instruction.Pminud, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminuw, new IntrinsicInfo(X86Instruction.Pminuw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmovsxbw, new IntrinsicInfo(X86Instruction.Pmovsxbw, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovsxdq, new IntrinsicInfo(X86Instruction.Pmovsxdq, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovsxwd, new IntrinsicInfo(X86Instruction.Pmovsxwd, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxbw, new IntrinsicInfo(X86Instruction.Pmovzxbw, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxdq, new IntrinsicInfo(X86Instruction.Pmovzxdq, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxwd, new IntrinsicInfo(X86Instruction.Pmovzxwd, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmulld, new IntrinsicInfo(X86Instruction.Pmulld, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmullw, new IntrinsicInfo(X86Instruction.Pmullw, IntrinsicType.Binary)); + Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount)); + Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary)); + Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary)); + Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psllw, new IntrinsicInfo(X86Instruction.Psllw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrad, new IntrinsicInfo(X86Instruction.Psrad, IntrinsicType.Binary)); + Add(Intrinsic.X86Psraw, new IntrinsicInfo(X86Instruction.Psraw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrld, new IntrinsicInfo(X86Instruction.Psrld, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrlq, new IntrinsicInfo(X86Instruction.Psrlq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrldq, new IntrinsicInfo(X86Instruction.Psrldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrlw, new IntrinsicInfo(X86Instruction.Psrlw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubb, new IntrinsicInfo(X86Instruction.Psubb, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubd, new IntrinsicInfo(X86Instruction.Psubd, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubq, new IntrinsicInfo(X86Instruction.Psubq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubw, new IntrinsicInfo(X86Instruction.Psubw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhbw, new IntrinsicInfo(X86Instruction.Punpckhbw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhdq, new IntrinsicInfo(X86Instruction.Punpckhdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhqdq, new IntrinsicInfo(X86Instruction.Punpckhqdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhwd, new IntrinsicInfo(X86Instruction.Punpckhwd, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklbw, new IntrinsicInfo(X86Instruction.Punpcklbw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckldq, new IntrinsicInfo(X86Instruction.Punpckldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklqdq, new IntrinsicInfo(X86Instruction.Punpcklqdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklwd, new IntrinsicInfo(X86Instruction.Punpcklwd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pxor, new IntrinsicInfo(X86Instruction.Pxor, IntrinsicType.Binary)); + Add(Intrinsic.X86Rcpps, new IntrinsicInfo(X86Instruction.Rcpps, IntrinsicType.Unary)); + Add(Intrinsic.X86Rcpss, new IntrinsicInfo(X86Instruction.Rcpss, IntrinsicType.Unary)); + Add(Intrinsic.X86Roundpd, new IntrinsicInfo(X86Instruction.Roundpd, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundps, new IntrinsicInfo(X86Instruction.Roundps, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundsd, new IntrinsicInfo(X86Instruction.Roundsd, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary)); + Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary)); + Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary)); + Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary)); + Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Subss, new IntrinsicInfo(X86Instruction.Subss, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpckhpd, new IntrinsicInfo(X86Instruction.Unpckhpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpckhps, new IntrinsicInfo(X86Instruction.Unpckhps, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpcklpd, new IntrinsicInfo(X86Instruction.Unpcklpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary)); + Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary)); + } + + private static void Add(Intrinsic intrin, IntrinsicInfo info) + { + _intrinTable[(int)intrin] = info; + } + + public static IntrinsicInfo GetInfo(Intrinsic intrin) + { + return _intrinTable[(int)intrin]; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/ARMeilleure/CodeGen/X86/IntrinsicType.cs new file mode 100644 index 00000000..4e9b33e1 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/IntrinsicType.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum IntrinsicType + { + Comis_, + PopCount, + Unary, + UnaryToGpr, + Binary, + BinaryImm, + Ternary, + TernaryImm + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs new file mode 100644 index 00000000..a1490131 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -0,0 +1,1280 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using System.Diagnostics; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.CodeGen.X86 +{ + using LLNode = LinkedListNode<Node>; + + static class PreAllocator + { + public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs) + { + maxCallArgs = -1; + + CallConvName callConv = CallingConvention.GetCurrentCallConv(); + + Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()]; + + foreach (BasicBlock block in cctx.Cfg.Blocks) + { + LLNode nextNode; + + for (LLNode node = block.Operations.First; node != null; node = nextNode) + { + nextNode = node.Next; + + if (!(node.Value is Operation operation)) + { + continue; + } + + HandleConstantCopy(node, operation); + + HandleSameDestSrc1Copy(node, operation); + + HandleFixedRegisterCopy(node, operation); + + switch (operation.Instruction) + { + case Instruction.Call: + // Get the maximum number of arguments used on a call. + // On windows, when a struct is returned from the call, + // we also need to pass the pointer where the struct + // should be written on the first argument. + int argsCount = operation.SourcesCount - 1; + + if (operation.Destination != null && operation.Destination.Type == OperandType.V128) + { + argsCount++; + } + + if (maxCallArgs < argsCount) + { + maxCallArgs = argsCount; + } + + // Copy values to registers expected by the function + // being called, as mandated by the ABI. + if (callConv == CallConvName.Windows) + { + node = HandleCallWindowsAbi(stackAlloc, node, operation); + } + else /* if (callConv == CallConvName.SystemV) */ + { + node = HandleCallSystemVAbi(node, operation); + } + break; + + case Instruction.ConvertToFPUI: + HandleConvertToFPUI(node, operation); + break; + + case Instruction.LoadArgument: + if (callConv == CallConvName.Windows) + { + HandleLoadArgumentWindowsAbi(cctx, node, preservedArgs, operation); + } + else /* if (callConv == CallConvName.SystemV) */ + { + HandleLoadArgumentSystemVAbi(cctx, node, preservedArgs, operation); + } + break; + + case Instruction.Negate: + if (!operation.GetSource(0).Type.IsInteger()) + { + node = HandleNegate(node, operation); + } + break; + + case Instruction.Return: + if (callConv == CallConvName.Windows) + { + HandleReturnWindowsAbi(cctx, node, preservedArgs, operation); + } + else /* if (callConv == CallConvName.SystemV) */ + { + HandleReturnSystemVAbi(node, operation); + } + break; + + case Instruction.VectorInsert8: + if (!HardwareCapabilities.SupportsSse41) + { + node = HandleVectorInsert8(node, operation); + } + break; + } + } + } + } + + private static void HandleConstantCopy(LLNode node, Operation operation) + { + if (operation.SourcesCount == 0 || IsIntrinsic(operation.Instruction)) + { + return; + } + + Instruction inst = operation.Instruction; + + Operand src1 = operation.GetSource(0); + Operand src2; + + if (src1.Kind == OperandKind.Constant) + { + if (!src1.Type.IsInteger()) + { + // Handle non-integer types (FP32, FP64 and V128). + // For instructions without an immediate operand, we do the following: + // - Insert a copy with the constant value (as integer) to a GPR. + // - Insert a copy from the GPR to a XMM register. + // - Replace the constant use with the XMM register. + src1 = AddXmmCopy(node, src1); + + operation.SetSource(0, src1); + } + else if (!HasConstSrc1(inst)) + { + // Handle integer types. + // Most ALU instructions accepts a 32-bits immediate on the second operand. + // We need to ensure the following: + // - If the constant is on operand 1, we need to move it. + // -- But first, we try to swap operand 1 and 2 if the instruction is commutative. + // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy. + // - If the constant is on operand 2, we check if the instruction supports it, + // if not, we also add a copy. 64-bits constants are usually not supported. + if (IsCommutative(inst)) + { + src2 = operation.GetSource(1); + + Operand temp = src1; + + src1 = src2; + src2 = temp; + + operation.SetSource(0, src1); + operation.SetSource(1, src2); + } + + if (src1.Kind == OperandKind.Constant) + { + src1 = AddCopy(node, src1); + + operation.SetSource(0, src1); + } + } + } + + if (operation.SourcesCount < 2) + { + return; + } + + src2 = operation.GetSource(1); + + if (src2.Kind == OperandKind.Constant) + { + if (!src2.Type.IsInteger()) + { + src2 = AddXmmCopy(node, src2); + + operation.SetSource(1, src2); + } + else if (!HasConstSrc2(inst) || IsLongConst(src2)) + { + src2 = AddCopy(node, src2); + + operation.SetSource(1, src2); + } + } + } + + private static LLNode HandleFixedRegisterCopy(LLNode node, Operation operation) + { + Operand dest = operation.Destination; + + LinkedList<Node> nodes = node.List; + + switch (operation.Instruction) + { + case Instruction.CompareAndSwap128: + { + // Handle the many restrictions of the compare and exchange (16 bytes) instruction: + // - The expected value should be in RDX:RAX. + // - The new value to be written should be in RCX:RBX. + // - The value at the memory location is loaded to RDX:RAX. + void SplitOperand(Operand source, Operand lr, Operand hr) + { + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, lr, source, Const(0))); + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1))); + } + + Operand rax = Gpr(X86Register.Rax, OperandType.I64); + Operand rbx = Gpr(X86Register.Rbx, OperandType.I64); + Operand rcx = Gpr(X86Register.Rcx, OperandType.I64); + Operand rdx = Gpr(X86Register.Rdx, OperandType.I64); + + SplitOperand(operation.GetSource(1), rax, rdx); + SplitOperand(operation.GetSource(2), rbx, rcx); + + node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, rax)); + node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1))); + + operation.SetDestinations(new Operand[] { rdx, rax }); + + operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx }); + + break; + } + + case Instruction.CpuId: + { + // Handle the many restrictions of the CPU Id instruction: + // - EAX controls the information returned by this instruction. + // - When EAX is 1, feature information is returned. + // - The information is written to registers EAX, EBX, ECX and EDX. + Debug.Assert(dest.Type == OperandType.I64); + + Operand eax = Gpr(X86Register.Rax, OperandType.I32); + Operand ebx = Gpr(X86Register.Rbx, OperandType.I32); + Operand ecx = Gpr(X86Register.Rcx, OperandType.I32); + Operand edx = Gpr(X86Register.Rdx, OperandType.I32); + + // Value 0x01 = Version, family and feature information. + nodes.AddBefore(node, new Operation(Instruction.Copy, eax, Const(1))); + + // Copy results to the destination register. + // The values are split into 2 32-bits registers, we merge them + // into a single 64-bits register. + Operand rcx = Gpr(X86Register.Rcx, OperandType.I64); + + node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, dest, edx)); + node = nodes.AddAfter(node, new Operation(Instruction.ShiftLeft, dest, dest, Const(32))); + node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, dest, dest, rcx)); + + operation.SetDestinations(new Operand[] { eax, ebx, ecx, edx }); + + operation.SetSources(new Operand[] { eax }); + + break; + } + + case Instruction.Divide: + case Instruction.DivideUI: + { + // Handle the many restrictions of the division instructions: + // - The dividend is always in RDX:RAX. + // - The result is always in RAX. + // - Additionally it also writes the remainder in RDX. + if (dest.Type.IsInteger()) + { + Operand src1 = operation.GetSource(0); + + Operand rax = Gpr(X86Register.Rax, src1.Type); + Operand rdx = Gpr(X86Register.Rdx, src1.Type); + + nodes.AddBefore(node, new Operation(Instruction.Copy, rax, src1)); + nodes.AddBefore(node, new Operation(Instruction.Clobber, rdx)); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rax)); + + operation.SetDestinations(new Operand[] { rdx, rax }); + + operation.SetSources(new Operand[] { rdx, rax, operation.GetSource(1) }); + + operation.Destination = rax; + } + + break; + } + + case Instruction.Extended: + { + IntrinsicOperation intrinOp = (IntrinsicOperation)operation; + + // PBLENDVB last operand is always implied to be XMM0 when VEX is not supported. + if (intrinOp.Intrinsic == Intrinsic.X86Pblendvb && !HardwareCapabilities.SupportsVexEncoding) + { + Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128); + + nodes.AddBefore(node, new Operation(Instruction.Copy, xmm0, operation.GetSource(2))); + + operation.SetSource(2, xmm0); + } + + break; + } + + case Instruction.Multiply64HighSI: + case Instruction.Multiply64HighUI: + { + // Handle the many restrictions of the i64 * i64 = i128 multiply instructions: + // - The multiplicand is always in RAX. + // - The lower 64-bits of the result is always in RAX. + // - The higher 64-bits of the result is always in RDX. + Operand src1 = operation.GetSource(0); + + Operand rax = Gpr(X86Register.Rax, src1.Type); + Operand rdx = Gpr(X86Register.Rdx, src1.Type); + + nodes.AddBefore(node, new Operation(Instruction.Copy, rax, src1)); + + operation.SetSource(0, rax); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rdx)); + + operation.SetDestinations(new Operand[] { rdx, rax }); + + break; + } + + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + { + // The shift register is always implied to be CL (low 8-bits of RCX or ECX). + if (operation.GetSource(1).Kind == OperandKind.LocalVariable) + { + Operand rcx = Gpr(X86Register.Rcx, OperandType.I32); + + nodes.AddBefore(node, new Operation(Instruction.Copy, rcx, operation.GetSource(1))); + + operation.SetSource(1, rcx); + } + + break; + } + } + + return node; + } + + private static LLNode HandleSameDestSrc1Copy(LLNode node, Operation operation) + { + if (operation.Destination == null || operation.SourcesCount == 0) + { + return node; + } + + Instruction inst = operation.Instruction; + + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + + LinkedList<Node> nodes = node.List; + + // The multiply instruction (that maps to IMUL) is somewhat special, it has + // a three operand form where the second source is a immediate value. + bool threeOperandForm = inst == Instruction.Multiply && operation.GetSource(1).Kind == OperandKind.Constant; + + if (IsSameOperandDestSrc1(operation) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm) + { + bool useNewLocal = false; + + for (int srcIndex = 1; srcIndex < operation.SourcesCount; srcIndex++) + { + if (operation.GetSource(srcIndex) == dest) + { + useNewLocal = true; + + break; + } + } + + if (useNewLocal) + { + // Dest is being used as some source already, we need to use a new + // local to store the temporary value, otherwise the value on dest + // local would be overwritten. + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, new Operation(Instruction.Copy, temp, src1)); + + operation.SetSource(0, temp); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, temp)); + + operation.Destination = temp; + } + else + { + nodes.AddBefore(node, new Operation(Instruction.Copy, dest, src1)); + + operation.SetSource(0, dest); + } + } + else if (inst == Instruction.ConditionalSelect) + { + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + if (src1 == dest || src2 == dest) + { + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, new Operation(Instruction.Copy, temp, src3)); + + operation.SetSource(2, temp); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, temp)); + + operation.Destination = temp; + } + else + { + nodes.AddBefore(node, new Operation(Instruction.Copy, dest, src3)); + + operation.SetSource(2, dest); + } + } + + return node; + } + + private static LLNode HandleConvertToFPUI(LLNode node, Operation operation) + { + // Unsigned integer to FP conversions are not supported on X86. + // We need to turn them into signed integer to FP conversions, and + // adjust the final result. + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\"."); + + LinkedList<Node> nodes = node.List; + + LLNode currentNode = node; + + if (source.Type == OperandType.I32) + { + // For 32-bits integers, we can just zero-extend to 64-bits, + // and then use the 64-bits signed conversion instructions. + Operand zex = Local(OperandType.I64); + + node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, zex, source)); + node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, dest, zex)); + } + else /* if (source.Type == OperandType.I64) */ + { + // For 64-bits integers, we need to do the following: + // - Ensure that the integer has the most significant bit clear. + // -- This can be done by shifting the value right by 1, that is, dividing by 2. + // -- The least significant bit is lost in this case though. + // - We can then convert the shifted value with a signed integer instruction. + // - The result still needs to be corrected after that. + // -- First, we need to multiply the result by 2, as we divided it by 2 before. + // --- This can be done efficiently by adding the result to itself. + // -- Then, we need to add the least significant bit that was shifted out. + // --- We can convert the least significant bit to float, and add it to the result. + Operand lsb = Local(OperandType.I64); + Operand half = Local(OperandType.I64); + + Operand lsbF = Local(dest.Type); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, lsb, source)); + node = nodes.AddAfter(node, new Operation(Instruction.Copy, half, source)); + + node = nodes.AddAfter(node, new Operation(Instruction.BitwiseAnd, lsb, lsb, Const(1L))); + node = nodes.AddAfter(node, new Operation(Instruction.ShiftRightUI, half, half, Const(1))); + + node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, lsbF, lsb)); + node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, dest, half)); + + node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, dest)); + node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, lsbF)); + } + + Delete(currentNode, operation); + + return node; + } + + private static LLNode HandleNegate(LLNode node, Operation operation) + { + // There's no SSE FP negate instruction, so we need to transform that into + // a XOR of the value to be negated with a mask with the highest bit set. + // This also produces -0 for a negation of the value 0. + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || + dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\"."); + + LinkedList<Node> nodes = node.List; + + LLNode currentNode = node; + + Operand res = Local(dest.Type); + + node = nodes.AddAfter(node, new Operation(Instruction.VectorOne, res)); + + if (dest.Type == OperandType.FP32) + { + node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Pslld, res, res, Const(31))); + } + else /* if (dest.Type == OperandType.FP64) */ + { + node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Psllq, res, res, Const(63))); + } + + node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Xorps, res, res, source)); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, res)); + + Delete(currentNode, operation); + + return node; + } + + private static LLNode HandleVectorInsert8(LLNode node, Operation operation) + { + // Handle vector insertion, when SSE 4.1 is not supported. + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + Debug.Assert(index < 16); + + LinkedList<Node> nodes = node.List; + + LLNode currentNode = node; + + Operand temp1 = Local(OperandType.I32); + Operand temp2 = Local(OperandType.I32); + + node = nodes.AddAfter(node, new Operation(Instruction.Copy, temp2, src2)); + + Operation vextOp = new Operation(Instruction.VectorExtract16, temp1, src1, Const(index >> 1)); + + node = nodes.AddAfter(node, vextOp); + + if ((index & 1) != 0) + { + node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend8, temp1, temp1)); + node = nodes.AddAfter(node, new Operation(Instruction.ShiftLeft, temp2, temp2, Const(8))); + node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, temp1, temp1, temp2)); + } + else + { + node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend8, temp2, temp2)); + node = nodes.AddAfter(node, new Operation(Instruction.BitwiseAnd, temp1, temp1, Const(0xff00))); + node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, temp1, temp1, temp2)); + } + + Operation vinsOp = new Operation(Instruction.VectorInsert16, dest, src1, temp1, Const(index >> 1)); + + node = nodes.AddAfter(node, vinsOp); + + Delete(currentNode, operation); + + return node; + } + + private static LLNode HandleCallWindowsAbi(StackAllocator stackAlloc, LLNode node, Operation operation) + { + Operand dest = operation.Destination; + + LinkedList<Node> nodes = node.List; + + // Handle struct arguments. + int retArgs = 0; + + int stackAllocOffset = 0; + + int AllocateOnStack(int size) + { + // We assume that the stack allocator is initially empty (TotalSize = 0). + // Taking that into account, we can reuse the space allocated for other + // calls by keeping track of our own allocated size (stackAllocOffset). + // If the space allocated is not big enough, then we just expand it. + int offset = stackAllocOffset; + + if (stackAllocOffset + size > stackAlloc.TotalSize) + { + stackAlloc.Allocate((stackAllocOffset + size) - stackAlloc.TotalSize); + } + + stackAllocOffset += size; + + return offset; + } + + Operand arg0Reg = null; + + if (dest != null && dest.Type == OperandType.V128) + { + int stackOffset = AllocateOnStack(dest.Type.GetSizeInBytes()); + + arg0Reg = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64); + + Operation allocOp = new Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset)); + + nodes.AddBefore(node, allocOp); + + retArgs = 1; + } + + int argsCount = operation.SourcesCount - 1; + + int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs; + + if (argsCount > maxArgs) + { + argsCount = maxArgs; + } + + Operand[] sources = new Operand[1 + retArgs + argsCount]; + + sources[0] = operation.GetSource(0); + + if (arg0Reg != null) + { + sources[1] = arg0Reg; + } + + for (int index = 1; index < operation.SourcesCount; index++) + { + Operand source = operation.GetSource(index); + + if (source.Type == OperandType.V128) + { + Operand stackAddr = Local(OperandType.I64); + + int stackOffset = AllocateOnStack(source.Type.GetSizeInBytes()); + + nodes.AddBefore(node, new Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset))); + + Operation storeOp = new Operation(Instruction.Store, null, stackAddr, source); + + HandleConstantCopy(nodes.AddBefore(node, storeOp), storeOp); + + operation.SetSource(index, stackAddr); + } + } + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(index + 1); + + Operand argReg; + + int argIndex = index + retArgs; + + if (source.Type.IsInteger()) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(argIndex), source.Type); + } + else + { + argReg = Xmm(CallingConvention.GetVecArgumentRegister(argIndex), source.Type); + } + + Operation copyOp = new Operation(Instruction.Copy, argReg, source); + + HandleConstantCopy(nodes.AddBefore(node, copyOp), copyOp); + + sources[1 + retArgs + index] = argReg; + } + + // The remaining arguments (those that are not passed on registers) + // should be passed on the stack, we write them to the stack with "SpillArg". + for (int index = argsCount; index < operation.SourcesCount - 1; index++) + { + Operand source = operation.GetSource(index + 1); + + Operand offset = new Operand((index + retArgs) * 8); + + Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source); + + HandleConstantCopy(nodes.AddBefore(node, spillOp), spillOp); + } + + if (dest != null) + { + if (dest.Type == OperandType.V128) + { + Operand retValueAddr = Local(OperandType.I64); + + nodes.AddBefore(node, new Operation(Instruction.Copy, retValueAddr, arg0Reg)); + + Operation loadOp = new Operation(Instruction.Load, dest, retValueAddr); + + node = nodes.AddAfter(node, loadOp); + + operation.Destination = null; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = new Operation(Instruction.Copy, dest, retReg); + + node = nodes.AddAfter(node, copyOp); + + operation.Destination = retReg; + } + } + + operation.SetSources(sources); + + return node; + } + + private static LLNode HandleCallSystemVAbi(LLNode node, Operation operation) + { + Operand dest = operation.Destination; + + LinkedList<Node> nodes = node.List; + + List<Operand> sources = new List<Operand>(); + + sources.Add(operation.GetSource(0)); + + int argsCount = operation.SourcesCount - 1; + + int intMax = CallingConvention.GetIntArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetVecArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + int stackOffset = 0; + + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(index + 1); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < intMax; + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = new Operation(Instruction.Copy, argReg, source); + + HandleConstantCopy(nodes.AddBefore(node, copyOp), copyOp); + + sources.Add(argReg); + } + else + { + Operand offset = new Operand(stackOffset); + + Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source); + + HandleConstantCopy(nodes.AddBefore(node, spillOp), spillOp); + + stackOffset += source.Type.GetSizeInBytes(); + } + } + + if (dest != null) + { + if (dest.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, retLReg)); + node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1))); + + operation.Destination = null; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = new Operation(Instruction.Copy, dest, retReg); + + node = nodes.AddAfter(node, copyOp); + + operation.Destination = retReg; + } + } + + operation.SetSources(sources.ToArray()); + + return node; + } + + private static void HandleLoadArgumentWindowsAbi( + CompilerContext cctx, + LLNode node, + Operand[] preservedArgs, + Operation operation) + { + Operand source = operation.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int retArgs = cctx.FuncReturnType == OperandType.V128 ? 1 : 0; + + int index = source.AsInt32() + retArgs; + + if (index < CallingConvention.GetArgumentsOnRegsCount()) + { + Operand dest = operation.Destination; + + if (preservedArgs[index] == null) + { + Operand argReg, pArg; + + if (dest.Type.IsInteger()) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), dest.Type); + + pArg = Local(dest.Type); + } + else if (dest.Type == OperandType.V128) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), OperandType.I64); + + pArg = Local(OperandType.I64); + } + else + { + argReg = Xmm(CallingConvention.GetVecArgumentRegister(index), dest.Type); + + pArg = Local(dest.Type); + } + + Operation copyOp = new Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + + Operation argCopyOp = new Operation(dest.Type == OperandType.V128 + ? Instruction.Load + : Instruction.Copy, dest, preservedArgs[index]); + + node.List.AddBefore(node, argCopyOp); + + Delete(node, operation); + } + else + { + // TODO: Pass on stack. + } + } + + private static void HandleLoadArgumentSystemVAbi( + CompilerContext cctx, + LLNode node, + Operand[] preservedArgs, + Operation operation) + { + Operand source = operation.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int index = source.AsInt32(); + + int intCount = 0; + int vecCount = 0; + + for (int cIndex = 0; cIndex < index; cIndex++) + { + OperandType argType = cctx.FuncArgTypes[cIndex]; + + if (argType.IsInteger()) + { + intCount++; + } + else if (argType == OperandType.V128) + { + intCount += 2; + } + else + { + vecCount++; + } + } + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < CallingConvention.GetIntArgumentsOnRegsCount(); + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < CallingConvention.GetIntArgumentsOnRegsCount(); + } + else + { + passOnReg = vecCount < CallingConvention.GetVecArgumentsOnRegsCount(); + } + + if (passOnReg) + { + Operand dest = operation.Destination; + + if (preservedArgs[index] == null) + { + if (dest.Type == OperandType.V128) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand pArg = Local(OperandType.V128); + + Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64); + Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64); + + Operation copyL = new Operation(Instruction.VectorCreateScalar, pArg, argLReg); + Operation copyH = new Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1)); + + cctx.Cfg.Entry.Operations.AddFirst(copyH); + cctx.Cfg.Entry.Operations.AddFirst(copyL); + + preservedArgs[index] = pArg; + } + else + { + Operand pArg = Local(dest.Type); + + Operand argReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type); + + Operation copyOp = new Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + } + + Operation argCopyOp = new Operation(Instruction.Copy, dest, preservedArgs[index]); + + node.List.AddBefore(node, argCopyOp); + + Delete(node, operation); + } + else + { + // TODO: Pass on stack. + } + } + + private static void HandleReturnWindowsAbi( + CompilerContext cctx, + LLNode node, + Operand[] preservedArgs, + Operation operation) + { + if (operation.SourcesCount == 0) + { + return; + } + + Operand source = operation.GetSource(0); + + Operand retReg; + + if (source.Type.IsInteger()) + { + retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type); + } + else if (source.Type == OperandType.V128) + { + if (preservedArgs[0] == null) + { + Operand preservedArg = Local(OperandType.I64); + + Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64); + + Operation copyOp = new Operation(Instruction.Copy, preservedArg, arg0); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[0] = preservedArg; + } + + retReg = preservedArgs[0]; + } + else + { + retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + } + + if (source.Type == OperandType.V128) + { + Operation retStoreOp = new Operation(Instruction.Store, null, retReg, source); + + node.List.AddBefore(node, retStoreOp); + } + else + { + Operation retCopyOp = new Operation(Instruction.Copy, retReg, source); + + node.List.AddBefore(node, retCopyOp); + } + + operation.SetSources(new Operand[0]); + } + + private static void HandleReturnSystemVAbi(LLNode node, Operation operation) + { + if (operation.SourcesCount == 0) + { + return; + } + + Operand source = operation.GetSource(0); + + if (source.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + node.List.AddBefore(node, new Operation(Instruction.VectorExtract, retLReg, source, Const(0))); + node.List.AddBefore(node, new Operation(Instruction.VectorExtract, retHReg, source, Const(1))); + } + else + { + Operand retReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + + Operation retCopyOp = new Operation(Instruction.Copy, retReg, source); + + node.List.AddBefore(node, retCopyOp); + } + } + + private static Operand AddXmmCopy(LLNode node, Operand source) + { + Operand temp = Local(source.Type); + + Operand intConst = AddCopy(node, GetIntConst(source)); + + Operation copyOp = new Operation(Instruction.VectorCreateScalar, temp, intConst); + + node.List.AddBefore(node, copyOp); + + return temp; + } + + private static Operand AddCopy(LLNode node, Operand source) + { + Operand temp = Local(source.Type); + + Operation copyOp = new Operation(Instruction.Copy, temp, source); + + node.List.AddBefore(node, copyOp); + + return temp; + } + + private static Operand GetIntConst(Operand value) + { + if (value.Type == OperandType.FP32) + { + return Const(value.AsInt32()); + } + else if (value.Type == OperandType.FP64) + { + return Const(value.AsInt64()); + } + + return value; + } + + private static bool IsLongConst(Operand operand) + { + long value = operand.Type == OperandType.I32 + ? operand.AsInt32() + : operand.AsInt64(); + + return !ConstFitsOnS32(value); + } + + private static bool ConstFitsOnS32(long value) + { + return value == (int)value; + } + + private static void Delete(LLNode node, Operation operation) + { + operation.Destination = null; + + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, null); + } + + node.List.Remove(node); + } + + private static Operand Gpr(X86Register register, OperandType type) + { + return Register((int)register, RegisterType.Integer, type); + } + + private static Operand Xmm(X86Register register, OperandType type) + { + return Register((int)register, RegisterType.Vector, type); + } + + private static bool IsSameOperandDestSrc1(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + case Instruction.Multiply: + case Instruction.Subtract: + return !HardwareCapabilities.SupportsVexEncoding || operation.Destination.Type.IsInteger(); + + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseNot: + case Instruction.BitwiseOr: + case Instruction.ByteSwap: + case Instruction.Negate: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + return true; + + case Instruction.Divide: + return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger(); + + case Instruction.VectorInsert: + case Instruction.VectorInsert16: + case Instruction.VectorInsert8: + return !HardwareCapabilities.SupportsVexEncoding; + } + + return IsVexSameOperandDestSrc1(operation); + } + + private static bool IsVexSameOperandDestSrc1(Operation operation) + { + if (IsIntrinsic(operation.Instruction)) + { + bool isUnary = operation.SourcesCount < 2; + + bool hasVecDest = operation.Destination != null && operation.Destination.Type == OperandType.V128; + + return !HardwareCapabilities.SupportsVexEncoding && !isUnary && hasVecDest; + } + + return false; + } + + private static bool HasConstSrc1(Instruction inst) + { + switch (inst) + { + case Instruction.Copy: + case Instruction.LoadArgument: + case Instruction.Spill: + case Instruction.SpillArg: + return true; + } + + return false; + } + + private static bool HasConstSrc2(Instruction inst) + { + switch (inst) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.CompareEqual: + case Instruction.CompareGreater: + case Instruction.CompareGreaterOrEqual: + case Instruction.CompareGreaterOrEqualUI: + case Instruction.CompareGreaterUI: + case Instruction.CompareLess: + case Instruction.CompareLessOrEqual: + case Instruction.CompareLessOrEqualUI: + case Instruction.CompareLessUI: + case Instruction.CompareNotEqual: + case Instruction.Multiply: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.Subtract: + case Instruction.VectorExtract: + case Instruction.VectorExtract16: + case Instruction.VectorExtract8: + return true; + } + + return false; + } + + private static bool IsCommutative(Instruction inst) + { + switch (inst) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.CompareEqual: + case Instruction.CompareNotEqual: + case Instruction.Multiply: + return true; + } + + return false; + } + + private static bool IsIntrinsic(Instruction inst) + { + return inst == Instruction.Extended; + } + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/X86Condition.cs b/ARMeilleure/CodeGen/X86/X86Condition.cs new file mode 100644 index 00000000..a17c6d6c --- /dev/null +++ b/ARMeilleure/CodeGen/X86/X86Condition.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Condition + { + Overflow = 0x0, + NotOverflow = 0x1, + Below = 0x2, + AboveOrEqual = 0x3, + Equal = 0x4, + NotEqual = 0x5, + BelowOrEqual = 0x6, + Above = 0x7, + Sign = 0x8, + NotSign = 0x9, + ParityEven = 0xa, + ParityOdd = 0xb, + Less = 0xc, + GreaterOrEqual = 0xd, + LessOrEqual = 0xe, + Greater = 0xf + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs new file mode 100644 index 00000000..10ba891a --- /dev/null +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -0,0 +1,190 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Instruction + { + Add, + Addpd, + Addps, + Addsd, + Addss, + And, + Andnpd, + Andnps, + Bsr, + Bswap, + Call, + Cmovcc, + Cmp, + Cmppd, + Cmpps, + Cmpsd, + Cmpss, + Cmpxchg16b, + Comisd, + Comiss, + Cpuid, + Cvtdq2pd, + Cvtdq2ps, + Cvtpd2dq, + Cvtpd2ps, + Cvtps2dq, + Cvtps2pd, + Cvtsd2si, + Cvtsd2ss, + Cvtsi2sd, + Cvtsi2ss, + Cvtss2sd, + Div, + Divpd, + Divps, + Divsd, + Divss, + Haddpd, + Haddps, + Idiv, + Imul, + Imul128, + Insertps, + Lea, + Maxpd, + Maxps, + Maxsd, + Maxss, + Minpd, + Minps, + Minsd, + Minss, + Mov, + Mov16, + Mov8, + Movd, + Movdqu, + Movhlps, + Movlhps, + Movq, + Movsd, + Movss, + Movsx16, + Movsx32, + Movsx8, + Movzx16, + Movzx8, + Mul128, + Mulpd, + Mulps, + Mulsd, + Mulss, + Neg, + Not, + Or, + Paddb, + Paddd, + Paddq, + Paddw, + Pand, + Pandn, + Pavgb, + Pavgw, + Pblendvb, + Pcmpeqb, + Pcmpeqd, + Pcmpeqq, + Pcmpeqw, + Pcmpgtb, + Pcmpgtd, + Pcmpgtq, + Pcmpgtw, + Pextrb, + Pextrd, + Pextrq, + Pextrw, + Pinsrb, + Pinsrd, + Pinsrq, + Pinsrw, + Pmaxsb, + Pmaxsd, + Pmaxsw, + Pmaxub, + Pmaxud, + Pmaxuw, + Pminsb, + Pminsd, + Pminsw, + Pminub, + Pminud, + Pminuw, + Pmovsxbw, + Pmovsxdq, + Pmovsxwd, + Pmovzxbw, + Pmovzxdq, + Pmovzxwd, + Pmulld, + Pmullw, + Pop, + Popcnt, + Por, + Pshufb, + Pshufd, + Pslld, + Pslldq, + Psllq, + Psllw, + Psrad, + Psraw, + Psrld, + Psrlq, + Psrldq, + Psrlw, + Psubb, + Psubd, + Psubq, + Psubw, + Punpckhbw, + Punpckhdq, + Punpckhqdq, + Punpckhwd, + Punpcklbw, + Punpckldq, + Punpcklqdq, + Punpcklwd, + Push, + Pxor, + Rcpps, + Rcpss, + Ror, + Roundpd, + Roundps, + Roundsd, + Roundss, + Rsqrtps, + Rsqrtss, + Sar, + Setcc, + Shl, + Shr, + Shufpd, + Shufps, + Sqrtpd, + Sqrtps, + Sqrtsd, + Sqrtss, + Sub, + Subpd, + Subps, + Subsd, + Subss, + Test, + Unpckhpd, + Unpckhps, + Unpcklpd, + Unpcklps, + Vpblendvb, + Xor, + Xorpd, + Xorps, + + Count + } +}
\ No newline at end of file diff --git a/ARMeilleure/CodeGen/X86/X86Register.cs b/ARMeilleure/CodeGen/X86/X86Register.cs new file mode 100644 index 00000000..01f63e31 --- /dev/null +++ b/ARMeilleure/CodeGen/X86/X86Register.cs @@ -0,0 +1,41 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Register + { + Invalid = -1, + + Rax = 0, + Rcx = 1, + Rdx = 2, + Rbx = 3, + Rsp = 4, + Rbp = 5, + Rsi = 6, + Rdi = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, + + Xmm0 = 0, + Xmm1 = 1, + Xmm2 = 2, + Xmm3 = 3, + Xmm4 = 4, + Xmm5 = 5, + Xmm6 = 6, + Xmm7 = 7, + Xmm8 = 8, + Xmm9 = 9, + Xmm10 = 10, + Xmm11 = 11, + Xmm12 = 12, + Xmm13 = 13, + Xmm14 = 14, + Xmm15 = 15 + } +}
\ No newline at end of file |