aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/CodeGen
diff options
context:
space:
mode:
authorgdkchan <gab.dark.100@gmail.com>2019-08-08 15:56:22 -0300
committeremmauss <emmausssss@gmail.com>2019-08-08 21:56:22 +0300
commita731ab3a2aad56e6ceb8b4e2444a61353246295c (patch)
treec7f13f51bfec6b19431e62167811ae31e9d2fea9 /ARMeilleure/CodeGen
parent1ba58e9942e54175e3f3a0e1d57a48537f4888b1 (diff)
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project * Refactoring around the old IRAdapter, now renamed to PreAllocator * Optimize the LowestBitSet method * Add CLZ support and fix CLS implementation * Add missing Equals and GetHashCode overrides on some structs, misc small tweaks * Implement the ByteSwap IR instruction, and some refactoring on the assembler * Implement the DivideUI IR instruction and fix 64-bits IDIV * Correct constant operand type on CSINC * Move division instructions implementation to InstEmitDiv * Fix destination type for the ConditionalSelect IR instruction * Implement UMULH and SMULH, with new IR instructions * Fix some issues with shift instructions * Fix constant types for BFM instructions * Fix up new tests using the new V128 struct * Update tests * Move DIV tests to a separate file * Add support for calls, and some instructions that depends on them * Start adding support for SIMD & FP types, along with some of the related ARM instructions * Fix some typos and the divide instruction with FP operands * Fix wrong method call on Clz_V * Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes * Implement SIMD logical instructions and more misc. fixes * Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations * Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes * Implement SIMD shift instruction and fix Dup_V * Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table * Fix check with tolerance on tester * Implement FP & SIMD comparison instructions, and some fixes * Update FCVT (Scalar) encoding on the table to support the Half-float variants * Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes * Use old memory access methods, made a start on SIMD memory insts support, some fixes * Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes * Fix arguments count with struct return values, other fixes * More instructions * Misc. fixes and integrate LDj3SNuD fixes * Update tests * Add a faster linear scan allocator, unwinding support on windows, and other changes * Update Ryujinx.HLE * Update Ryujinx.Graphics * Fix V128 return pointer passing, RCX is clobbered * Update Ryujinx.Tests * Update ITimeZoneService * Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks * Use generic GetFunctionPointerForDelegate method and other tweaks * Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics * Remove some unused code on the assembler * Fix REX.W prefix regression on float conversion instructions, add some sort of profiler * Add hardware capability detection * Fix regression on Sha1h and revert Fcm** changes * Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator * Fix silly mistake introduced on last commit on CpuId * Generate inline stack probes when the stack allocation is too large * Initial support for the System-V ABI * Support multiple destination operands * Fix SSE2 VectorInsert8 path, and other fixes * Change placement of XMM callee save and restore code to match other compilers * Rename Dest to Destination and Inst to Instruction * Fix a regression related to calls and the V128 type * Add an extra space on comments to match code style * Some refactoring * Fix vector insert FP32 SSE2 path * Port over the ARM32 instructions * Avoid memory protection races on JIT Cache * Another fix on VectorInsert FP32 (thanks to LDj3SNuD * Float operands don't need to use the same register when VEX is supported * Add a new register allocator, higher quality code for hot code (tier up), and other tweaks * Some nits, small improvements on the pre allocator * CpuThreadState is gone * Allow changing CPU emulators with a config entry * Add runtime identifiers on the ARMeilleure project * Allow switching between CPUs through a config entry (pt. 2) * Change win10-x64 to win-x64 on projects * Update the Ryujinx project to use ARMeilleure * Ensure that the selected register is valid on the hybrid allocator * Allow exiting on returns to 0 (should fix test regression) * Remove register assignments for most used variables on the hybrid allocator * Do not use fixed registers as spill temp * Add missing namespace and remove unneeded using * Address PR feedback * Fix types, etc * Enable AssumeStrictAbiCompliance by default * Ensure that Spill and Fill don't load or store any more than necessary
Diffstat (limited to 'ARMeilleure/CodeGen')
-rw-r--r--ARMeilleure/CodeGen/CompiledFunction.cs17
-rw-r--r--ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs258
-rw-r--r--ARMeilleure/CodeGen/Optimizations/Optimizer.cs126
-rw-r--r--ARMeilleure/CodeGen/Optimizations/Simplification.cs157
-rw-r--r--ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs19
-rw-r--r--ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs246
-rw-r--r--ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs382
-rw-r--r--ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs12
-rw-r--r--ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs1019
-rw-r--r--ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs390
-rw-r--r--ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs31
-rw-r--r--ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs47
-rw-r--r--ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs27
-rw-r--r--ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs18
-rw-r--r--ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs20
-rw-r--r--ARMeilleure/CodeGen/X86/Assembler.cs1358
-rw-r--r--ARMeilleure/CodeGen/X86/CallConvName.cs8
-rw-r--r--ARMeilleure/CodeGen/X86/CallingConvention.cs159
-rw-r--r--ARMeilleure/CodeGen/X86/CodeGenContext.cs305
-rw-r--r--ARMeilleure/CodeGen/X86/CodeGenerator.cs1661
-rw-r--r--ARMeilleure/CodeGen/X86/HardwareCapabilities.cs52
-rw-r--r--ARMeilleure/CodeGen/X86/IntrinsicInfo.cs14
-rw-r--r--ARMeilleure/CodeGen/X86/IntrinsicTable.cs160
-rw-r--r--ARMeilleure/CodeGen/X86/IntrinsicType.cs14
-rw-r--r--ARMeilleure/CodeGen/X86/PreAllocator.cs1280
-rw-r--r--ARMeilleure/CodeGen/X86/X86Condition.cs22
-rw-r--r--ARMeilleure/CodeGen/X86/X86Instruction.cs190
-rw-r--r--ARMeilleure/CodeGen/X86/X86Register.cs41
28 files changed, 8033 insertions, 0 deletions
diff --git a/ARMeilleure/CodeGen/CompiledFunction.cs b/ARMeilleure/CodeGen/CompiledFunction.cs
new file mode 100644
index 00000000..61e89c24
--- /dev/null
+++ b/ARMeilleure/CodeGen/CompiledFunction.cs
@@ -0,0 +1,17 @@
+using ARMeilleure.CodeGen.Unwinding;
+
+namespace ARMeilleure.CodeGen
+{
+ struct CompiledFunction
+ {
+ public byte[] Code { get; }
+
+ public UnwindInfo UnwindInfo { get; }
+
+ public CompiledFunction(byte[] code, UnwindInfo unwindInfo)
+ {
+ Code = code;
+ UnwindInfo = unwindInfo;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
new file mode 100644
index 00000000..84eedee0
--- /dev/null
+++ b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
@@ -0,0 +1,258 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class ConstantFolding
+ {
+ public static void RunPass(Operation operation)
+ {
+ if (operation.Destination == null || operation.SourcesCount == 0)
+ {
+ return;
+ }
+
+ if (!AreAllSourcesConstant(operation))
+ {
+ return;
+ }
+
+ OperandType type = operation.Destination.Type;
+
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x + y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x + y);
+ }
+ break;
+
+ case Instruction.BitwiseAnd:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x & y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x & y);
+ }
+ break;
+
+ case Instruction.BitwiseExclusiveOr:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x ^ y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x ^ y);
+ }
+ break;
+
+ case Instruction.BitwiseNot:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => ~x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => ~x);
+ }
+ break;
+
+ case Instruction.BitwiseOr:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x | y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x | y);
+ }
+ break;
+
+ case Instruction.Copy:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => x);
+ }
+ break;
+
+ case Instruction.Divide:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => y != 0 ? x / y : 0);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => y != 0 ? x / y : 0);
+ }
+ break;
+
+ case Instruction.DivideUI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => y != 0 ? (int)((uint)x / (uint)y) : 0);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => y != 0 ? (long)((ulong)x / (ulong)y) : 0);
+ }
+ break;
+
+ case Instruction.Multiply:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x * y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x * y);
+ }
+ break;
+
+ case Instruction.Negate:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => -x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => -x);
+ }
+ break;
+
+ case Instruction.ShiftLeft:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x << y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x << (int)y);
+ }
+ break;
+
+ case Instruction.ShiftRightSI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x >> y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x >> (int)y);
+ }
+ break;
+
+ case Instruction.ShiftRightUI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => (int)((uint)x >> y));
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => (long)((ulong)x >> (int)y));
+ }
+ break;
+
+ case Instruction.SignExtend16:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (short)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (short)x);
+ }
+ break;
+
+ case Instruction.SignExtend32:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (int)x);
+ }
+ break;
+
+ case Instruction.SignExtend8:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (sbyte)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (sbyte)x);
+ }
+ break;
+
+ case Instruction.Subtract:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x - y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x - y);
+ }
+ break;
+ }
+ }
+
+ private static bool AreAllSourcesConstant(Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ if (operation.GetSource(index).Kind != OperandKind.Constant)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static void EvaluateUnaryI32(Operation operation, Func<int, int> op)
+ {
+ int x = operation.GetSource(0).AsInt32();
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateUnaryI64(Operation operation, Func<long, long> op)
+ {
+ long x = operation.GetSource(0).AsInt64();
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateBinaryI32(Operation operation, Func<int, int, int> op)
+ {
+ int x = operation.GetSource(0).AsInt32();
+ int y = operation.GetSource(1).AsInt32();
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+
+ private static void EvaluateBinaryI64(Operation operation, Func<long, long, long> op)
+ {
+ long x = operation.GetSource(0).AsInt64();
+ long y = operation.GetSource(1).AsInt64();
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
new file mode 100644
index 00000000..c01a8f1e
--- /dev/null
+++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
@@ -0,0 +1,126 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class Optimizer
+ {
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ LinkedListNode<Node> node = block.Operations.First;
+
+ while (node != null)
+ {
+ LinkedListNode<Node> nextNode = node.Next;
+
+ bool isUnused = IsUnused(node.Value);
+
+ if (!(node.Value is Operation operation) || isUnused)
+ {
+ if (isUnused)
+ {
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+
+ node = nextNode;
+
+ continue;
+ }
+
+ ConstantFolding.RunPass(operation);
+
+ Simplification.RunPass(operation);
+
+ if (DestIsLocalVar(operation) && IsPropagableCopy(operation))
+ {
+ PropagateCopy(operation);
+
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+
+ node = nextNode;
+ }
+ }
+ }
+ while (modified);
+ }
+
+ private static void PropagateCopy(Operation copyOp)
+ {
+ // Propagate copy source operand to all uses of the destination operand.
+ Operand dest = copyOp.Destination;
+ Operand source = copyOp.GetSource(0);
+
+ Node[] uses = dest.Uses.ToArray();
+
+ foreach (Node use in uses)
+ {
+ for (int index = 0; index < use.SourcesCount; index++)
+ {
+ if (use.GetSource(index) == dest)
+ {
+ use.SetSource(index, source);
+ }
+ }
+ }
+ }
+
+ private static void RemoveNode(BasicBlock block, LinkedListNode<Node> llNode)
+ {
+ // Remove a node from the nodes list, and also remove itself
+ // from all the use lists on the operands that this node uses.
+ block.Operations.Remove(llNode);
+
+ Node node = llNode.Value;
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, null);
+ }
+
+ Debug.Assert(node.Destination == null || node.Destination.Uses.Count == 0);
+
+ node.Destination = null;
+ }
+
+ private static bool IsUnused(Node node)
+ {
+ return DestIsLocalVar(node) && node.Destination.Uses.Count == 0 && !HasSideEffects(node);
+ }
+
+ private static bool DestIsLocalVar(Node node)
+ {
+ return node.Destination != null && node.Destination.Kind == OperandKind.LocalVariable;
+ }
+
+ private static bool HasSideEffects(Node node)
+ {
+ return (node is Operation operation) && operation.Instruction == Instruction.Call;
+ }
+
+ private static bool IsPropagableCopy(Operation operation)
+ {
+ if (operation.Instruction != Instruction.Copy)
+ {
+ return false;
+ }
+
+ return operation.Destination.Type == operation.GetSource(0).Type;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Optimizations/Simplification.cs b/ARMeilleure/CodeGen/Optimizations/Simplification.cs
new file mode 100644
index 00000000..cafc025c
--- /dev/null
+++ b/ARMeilleure/CodeGen/Optimizations/Simplification.cs
@@ -0,0 +1,157 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class Simplification
+ {
+ public static void RunPass(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseExclusiveOr:
+ TryEliminateBinaryOpComutative(operation, 0);
+ break;
+
+ case Instruction.BitwiseAnd:
+ TryEliminateBitwiseAnd(operation);
+ break;
+
+ case Instruction.BitwiseOr:
+ TryEliminateBitwiseOr(operation);
+ break;
+
+ case Instruction.ConditionalSelect:
+ TryEliminateConditionalSelect(operation);
+ break;
+
+ case Instruction.Divide:
+ TryEliminateBinaryOpY(operation, 1);
+ break;
+
+ case Instruction.Multiply:
+ TryEliminateBinaryOpComutative(operation, 1);
+ break;
+
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.Subtract:
+ TryEliminateBinaryOpY(operation, 0);
+ break;
+ }
+ }
+
+ private static void TryEliminateBitwiseAnd(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
+ // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, AllOnes(x.Type)))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, AllOnes(y.Type)))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, 0) || IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(Const(0));
+ }
+ }
+
+ private static void TryEliminateBitwiseOr(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x | 0x00000000 == x, 0x00000000 | y == y,
+ // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, 0))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, AllOnes(x.Type)) || IsConstEqual(y, AllOnes(y.Type)))
+ {
+ operation.TurnIntoCopy(Const(AllOnes(x.Type)));
+ }
+ }
+
+ private static void TryEliminateBinaryOpY(Operation operation, ulong comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateBinaryOpComutative(Operation operation, ulong comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, comparand))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateConditionalSelect(Operation operation)
+ {
+ Operand cond = operation.GetSource(0);
+
+ if (cond.Kind != OperandKind.Constant)
+ {
+ return;
+ }
+
+ // The condition is constant, we can turn it into a copy, and select
+ // the source based on the condition value.
+ int srcIndex = cond.Value != 0 ? 1 : 2;
+
+ Operand source = operation.GetSource(srcIndex);
+
+ operation.TurnIntoCopy(source);
+ }
+
+ private static bool IsConstEqual(Operand operand, ulong comparand)
+ {
+ if (operand.Kind != OperandKind.Constant || !operand.Type.IsInteger())
+ {
+ return false;
+ }
+
+ return operand.Value == comparand;
+ }
+
+ private static ulong AllOnes(OperandType type)
+ {
+ switch (type)
+ {
+ case OperandType.I32: return ~0U;
+ case OperandType.I64: return ~0UL;
+ }
+
+ throw new ArgumentException("Invalid operand type \"" + type + "\".");
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
new file mode 100644
index 00000000..94ac6991
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ struct AllocationResult
+ {
+ public int IntUsedRegisters { get; }
+ public int VecUsedRegisters { get; }
+ public int SpillRegionSize { get; }
+
+ public AllocationResult(
+ int intUsedRegisters,
+ int vecUsedRegisters,
+ int spillRegionSize)
+ {
+ IntUsedRegisters = intUsedRegisters;
+ VecUsedRegisters = vecUsedRegisters;
+ SpillRegionSize = spillRegionSize;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
new file mode 100644
index 00000000..65901e80
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
@@ -0,0 +1,246 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class CopyResolver
+ {
+ private class ParallelCopy
+ {
+ private struct Copy
+ {
+ public Register Dest { get; }
+ public Register Source { get; }
+
+ public OperandType Type { get; }
+
+ public Copy(Register dest, Register source, OperandType type)
+ {
+ Dest = dest;
+ Source = source;
+ Type = type;
+ }
+ }
+
+ private List<Copy> _copies;
+
+ public int Count => _copies.Count;
+
+ public ParallelCopy()
+ {
+ _copies = new List<Copy>();
+ }
+
+ public void AddCopy(Register dest, Register source, OperandType type)
+ {
+ _copies.Add(new Copy(dest, source, type));
+ }
+
+ public void Sequence(List<Operation> sequence)
+ {
+ Dictionary<Register, Register> locations = new Dictionary<Register, Register>();
+ Dictionary<Register, Register> sources = new Dictionary<Register, Register>();
+
+ Dictionary<Register, OperandType> types = new Dictionary<Register, OperandType>();
+
+ Queue<Register> pendingQueue = new Queue<Register>();
+ Queue<Register> readyQueue = new Queue<Register>();
+
+ foreach (Copy copy in _copies)
+ {
+ locations[copy.Source] = copy.Source;
+ sources[copy.Dest] = copy.Source;
+ types[copy.Dest] = copy.Type;
+
+ pendingQueue.Enqueue(copy.Dest);
+ }
+
+ foreach (Copy copy in _copies)
+ {
+ // If the destination is not used anywhere, we can assign it immediately.
+ if (!locations.ContainsKey(copy.Dest))
+ {
+ readyQueue.Enqueue(copy.Dest);
+ }
+ }
+
+ while (pendingQueue.TryDequeue(out Register current))
+ {
+ Register copyDest;
+ Register origSource;
+ Register copySource;
+
+ while (readyQueue.TryDequeue(out copyDest))
+ {
+ origSource = sources[copyDest];
+ copySource = locations[origSource];
+
+ OperandType type = types[copyDest];
+
+ EmitCopy(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+ locations[origSource] = copyDest;
+
+ if (origSource == copySource && sources.ContainsKey(origSource))
+ {
+ readyQueue.Enqueue(origSource);
+ }
+ }
+
+ copyDest = current;
+ origSource = sources[copyDest];
+ copySource = locations[origSource];
+
+ if (copyDest != copySource)
+ {
+ OperandType type = types[copyDest];
+
+ type = type.IsInteger() ? OperandType.I64 : OperandType.V128;
+
+ EmitXorSwap(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+ locations[origSource] = copyDest;
+
+ Register swapOther = copySource;
+
+ if (copyDest != locations[sources[copySource]])
+ {
+ // Find the other swap destination register.
+ // To do that, we search all the pending registers, and pick
+ // the one where the copy source register is equal to the
+ // current destination register being processed (copyDest).
+ foreach (Register pending in pendingQueue)
+ {
+ // Is this a copy of pending <- copyDest?
+ if (copyDest == locations[sources[pending]])
+ {
+ swapOther = pending;
+
+ break;
+ }
+ }
+ }
+
+ // The value that was previously at "copyDest" now lives on
+ // "copySource" thanks to the swap, now we need to update the
+ // location for the next copy that is supposed to copy the value
+ // that used to live on "copyDest".
+ locations[sources[swapOther]] = copySource;
+ }
+ }
+ }
+
+ private static void EmitCopy(List<Operation> sequence, Operand x, Operand y)
+ {
+ sequence.Add(new Operation(Instruction.Copy, x, y));
+ }
+
+ private static void EmitXorSwap(List<Operation> sequence, Operand x, Operand y)
+ {
+ sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+ sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, y, y, x));
+ sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+ }
+ }
+
+ private Queue<Operation> _fillQueue = new Queue<Operation>();
+ private Queue<Operation> _spillQueue = new Queue<Operation>();
+
+ private ParallelCopy _parallelCopy;
+
+ public bool HasCopy { get; private set; }
+
+ public CopyResolver()
+ {
+ _fillQueue = new Queue<Operation>();
+ _spillQueue = new Queue<Operation>();
+
+ _parallelCopy = new ParallelCopy();
+ }
+
+ public void AddSplit(LiveInterval left, LiveInterval right)
+ {
+ if (left.Local != right.Local)
+ {
+ throw new ArgumentException("Intervals of different variables are not allowed.");
+ }
+
+ OperandType type = left.Local.Type;
+
+ if (left.IsSpilled && !right.IsSpilled)
+ {
+ // Move from the stack to a register.
+ AddSplitFill(left, right, type);
+ }
+ else if (!left.IsSpilled && right.IsSpilled)
+ {
+ // Move from a register to the stack.
+ AddSplitSpill(left, right, type);
+ }
+ else if (!left.IsSpilled && !right.IsSpilled && left.Register != right.Register)
+ {
+ // Move from one register to another.
+ AddSplitCopy(left, right, type);
+ }
+ else if (left.SpillOffset != right.SpillOffset)
+ {
+ // This would be the stack-to-stack move case, but this is not supported.
+ throw new ArgumentException("Both intervals were spilled.");
+ }
+ }
+
+ private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ Operand register = GetRegister(right.Register, type);
+
+ Operand offset = new Operand(left.SpillOffset);
+
+ _fillQueue.Enqueue(new Operation(Instruction.Fill, register, offset));
+
+ HasCopy = true;
+ }
+
+ private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ Operand offset = new Operand(right.SpillOffset);
+
+ Operand register = GetRegister(left.Register, type);
+
+ _spillQueue.Enqueue(new Operation(Instruction.Spill, null, offset, register));
+
+ HasCopy = true;
+ }
+
+ private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ _parallelCopy.AddCopy(right.Register, left.Register, type);
+
+ HasCopy = true;
+ }
+
+ public Operation[] Sequence()
+ {
+ List<Operation> sequence = new List<Operation>();
+
+ while (_spillQueue.TryDequeue(out Operation spillOp))
+ {
+ sequence.Add(spillOp);
+ }
+
+ _parallelCopy.Sequence(sequence);
+
+ while (_fillQueue.TryDequeue(out Operation fillOp))
+ {
+ sequence.Add(fillOp);
+ }
+
+ return sequence.ToArray();
+ }
+
+ private static Operand GetRegister(Register reg, OperandType type)
+ {
+ return new Operand(reg.Index, reg.Type, type);
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
new file mode 100644
index 00000000..9a827420
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
@@ -0,0 +1,382 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class HybridAllocator : IRegisterAllocator
+ {
+ private const int RegistersCount = 16;
+ private const int MaxIROperands = 4;
+
+ private struct BlockInfo
+ {
+ public bool HasCall { get; }
+
+ public int IntFixedRegisters { get; }
+ public int VecFixedRegisters { get; }
+
+ public BlockInfo(bool hasCall, int intFixedRegisters, int vecFixedRegisters)
+ {
+ HasCall = hasCall;
+ IntFixedRegisters = intFixedRegisters;
+ VecFixedRegisters = vecFixedRegisters;
+ }
+ }
+
+ private class LocalInfo
+ {
+ public int Uses { get; set; }
+ public int UseCount { get; set; }
+
+ public bool PreAllocated { get; set; }
+ public int Register { get; set; }
+ public int SpillOffset { get; set; }
+
+ public int Sequence { get; set; }
+
+ public Operand Temp { get; set; }
+
+ public OperandType Type { get; }
+
+ private int _first;
+ private int _last;
+
+ public bool IsBlockLocal => _first == _last;
+
+ public LocalInfo(OperandType type, int uses)
+ {
+ Uses = uses;
+ Type = type;
+
+ _first = -1;
+ _last = -1;
+ }
+
+ public void SetBlockIndex(int blkIndex)
+ {
+ if (_first == -1 || blkIndex < _first)
+ {
+ _first = blkIndex;
+ }
+
+ if (_last == -1 || blkIndex > _last)
+ {
+ _last = blkIndex;
+ }
+ }
+ }
+
+ public AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks)
+ {
+ int intUsedRegisters = 0;
+ int vecUsedRegisters = 0;
+
+ int intFreeRegisters = regMasks.IntAvailableRegisters;
+ int vecFreeRegisters = regMasks.VecAvailableRegisters;
+
+ BlockInfo[] blockInfo = new BlockInfo[cfg.Blocks.Count];
+
+ List<LocalInfo> locInfo = new List<LocalInfo>();
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ int intFixedRegisters = 0;
+ int vecFixedRegisters = 0;
+
+ bool hasCall = false;
+
+ foreach (Node node in block.Operations)
+ {
+ if (node is Operation operation && operation.Instruction == Instruction.Call)
+ {
+ hasCall = true;
+ }
+
+ for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ Operand source = node.GetSource(srcIndex);
+
+ if (source.Kind == OperandKind.LocalVariable)
+ {
+ locInfo[source.AsInt32() - 1].SetBlockIndex(block.Index);
+ }
+ }
+
+ for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++)
+ {
+ Operand dest = node.GetDestination(dstIndex);
+
+ if (dest.Kind == OperandKind.LocalVariable)
+ {
+ LocalInfo info;
+
+ if (dest.Value != 0)
+ {
+ info = locInfo[dest.AsInt32() - 1];
+ }
+ else
+ {
+ dest.NumberLocal(locInfo.Count + 1);
+
+ info = new LocalInfo(dest.Type, UsesCount(dest));
+
+ locInfo.Add(info);
+ }
+
+ info.SetBlockIndex(block.Index);
+ }
+ else if (dest.Kind == OperandKind.Register)
+ {
+ if (dest.Type.IsInteger())
+ {
+ intFixedRegisters |= 1 << dest.GetRegister().Index;
+ }
+ else
+ {
+ vecFixedRegisters |= 1 << dest.GetRegister().Index;
+ }
+ }
+ }
+ }
+
+ blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters);
+ }
+
+ int sequence = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ BlockInfo blkInfo = blockInfo[block.Index];
+
+ int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters;
+ int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters;
+
+ int intCallerSavedRegisters = blkInfo.HasCall ? regMasks.IntCallerSavedRegisters : 0;
+ int vecCallerSavedRegisters = blkInfo.HasCall ? regMasks.VecCallerSavedRegisters : 0;
+
+ int intSpillTempRegisters = SelectSpillTemps(
+ intCallerSavedRegisters & ~blkInfo.IntFixedRegisters,
+ intLocalFreeRegisters);
+ int vecSpillTempRegisters = SelectSpillTemps(
+ vecCallerSavedRegisters & ~blkInfo.VecFixedRegisters,
+ vecLocalFreeRegisters);
+
+ intLocalFreeRegisters &= ~(intSpillTempRegisters | intCallerSavedRegisters);
+ vecLocalFreeRegisters &= ~(vecSpillTempRegisters | vecCallerSavedRegisters);
+
+ for (LinkedListNode<Node> llNode = block.Operations.First; llNode != null; llNode = llNode.Next)
+ {
+ Node node = llNode.Value;
+
+ int intLocalUse = 0;
+ int vecLocalUse = 0;
+
+ for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ Operand source = node.GetSource(srcIndex);
+
+ if (source.Kind != OperandKind.LocalVariable)
+ {
+ continue;
+ }
+
+ LocalInfo info = locInfo[source.AsInt32() - 1];
+
+ info.UseCount++;
+
+ Debug.Assert(info.UseCount <= info.Uses);
+
+ if (info.Register != -1)
+ {
+ node.SetSource(srcIndex, Register(info.Register, source.Type.ToRegisterType(), source.Type));
+
+ if (info.UseCount == info.Uses && !info.PreAllocated)
+ {
+ if (source.Type.IsInteger())
+ {
+ intLocalFreeRegisters |= 1 << info.Register;
+ }
+ else
+ {
+ vecLocalFreeRegisters |= 1 << info.Register;
+ }
+ }
+ }
+ else
+ {
+ Operand temp = info.Temp;
+
+ if (temp == null || info.Sequence != sequence)
+ {
+ temp = source.Type.IsInteger()
+ ? GetSpillTemp(source, intSpillTempRegisters, ref intLocalUse)
+ : GetSpillTemp(source, vecSpillTempRegisters, ref vecLocalUse);
+
+ info.Sequence = sequence;
+ info.Temp = temp;
+ }
+
+ node.SetSource(srcIndex, temp);
+
+ Operation fillOp = new Operation(Instruction.Fill, temp, Const(info.SpillOffset));
+
+ block.Operations.AddBefore(llNode, fillOp);
+ }
+ }
+
+ int intLocalAsg = 0;
+ int vecLocalAsg = 0;
+
+ for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++)
+ {
+ Operand dest = node.GetDestination(dstIndex);
+
+ if (dest.Kind != OperandKind.LocalVariable)
+ {
+ continue;
+ }
+
+ LocalInfo info = locInfo[dest.AsInt32() - 1];
+
+ if (info.UseCount == 0 && !info.PreAllocated)
+ {
+ int mask = dest.Type.IsInteger()
+ ? intLocalFreeRegisters
+ : vecLocalFreeRegisters;
+
+ if (info.IsBlockLocal && mask != 0)
+ {
+ int selectedReg = BitUtils.LowestBitSet(mask);
+
+ info.Register = selectedReg;
+
+ if (dest.Type.IsInteger())
+ {
+ intLocalFreeRegisters &= ~(1 << selectedReg);
+ intUsedRegisters |= 1 << selectedReg;
+ }
+ else
+ {
+ vecLocalFreeRegisters &= ~(1 << selectedReg);
+ vecUsedRegisters |= 1 << selectedReg;
+ }
+ }
+ else
+ {
+ info.Register = -1;
+ info.SpillOffset = stackAlloc.Allocate(dest.Type.GetSizeInBytes());
+ }
+ }
+
+ info.UseCount++;
+
+ Debug.Assert(info.UseCount <= info.Uses);
+
+ if (info.Register != -1)
+ {
+ node.SetDestination(dstIndex, Register(info.Register, dest.Type.ToRegisterType(), dest.Type));
+ }
+ else
+ {
+ Operand temp = info.Temp;
+
+ if (temp == null || info.Sequence != sequence)
+ {
+ temp = dest.Type.IsInteger()
+ ? GetSpillTemp(dest, intSpillTempRegisters, ref intLocalAsg)
+ : GetSpillTemp(dest, vecSpillTempRegisters, ref vecLocalAsg);
+
+ info.Sequence = sequence;
+ info.Temp = temp;
+ }
+
+ node.SetDestination(dstIndex, temp);
+
+ Operation spillOp = new Operation(Instruction.Spill, null, Const(info.SpillOffset), temp);
+
+ llNode = block.Operations.AddAfter(llNode, spillOp);
+ }
+ }
+
+ sequence++;
+
+ intUsedRegisters |= intLocalAsg | intLocalUse;
+ vecUsedRegisters |= vecLocalAsg | vecLocalUse;
+ }
+ }
+
+ return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize);
+ }
+
+ private static int SelectSpillTemps(int mask0, int mask1)
+ {
+ int selection = 0;
+ int count = 0;
+
+ while (count < MaxIROperands && mask0 != 0)
+ {
+ int mask = mask0 & -mask0;
+
+ selection |= mask;
+
+ mask0 &= ~mask;
+
+ count++;
+ }
+
+ while (count < MaxIROperands && mask1 != 0)
+ {
+ int mask = mask1 & -mask1;
+
+ selection |= mask;
+
+ mask1 &= ~mask;
+
+ count++;
+ }
+
+ Debug.Assert(count == MaxIROperands, "No enough registers for spill temps.");
+
+ return selection;
+ }
+
+ private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask)
+ {
+ int selectedReg = BitUtils.LowestBitSet(freeMask & ~useMask);
+
+ useMask |= 1 << selectedReg;
+
+ return Register(selectedReg, local.Type.ToRegisterType(), local.Type);
+ }
+
+ private static int UsesCount(Operand local)
+ {
+ return local.Assignments.Count + local.Uses.Count;
+ }
+
+ private static IEnumerable<BasicBlock> Successors(BasicBlock block)
+ {
+ if (block.Next != null)
+ {
+ yield return block.Next;
+ }
+
+ if (block.Branch != null)
+ {
+ yield return block.Branch;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
new file mode 100644
index 00000000..8f236c25
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
@@ -0,0 +1,12 @@
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ interface IRegisterAllocator
+ {
+ AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks);
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
new file mode 100644
index 00000000..6d5ecc14
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
@@ -0,0 +1,1019 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ // Based on:
+ // "Linear Scan Register Allocation for the Java(tm) HotSpot Client Compiler".
+ // http://www.christianwimmer.at/Publications/Wimmer04a/Wimmer04a.pdf
+ class LinearScanAllocator : IRegisterAllocator
+ {
+ private const int InstructionGap = 2;
+ private const int InstructionGapMask = InstructionGap - 1;
+
+ private const int RegistersCount = 16;
+
+ private HashSet<int> _blockEdges;
+
+ private LiveRange[] _blockRanges;
+
+ private BitMap[] _blockLiveIn;
+
+ private List<LiveInterval> _intervals;
+
+ private LiveInterval[] _parentIntervals;
+
+ private List<LinkedListNode<Node>> _operationNodes;
+
+ private int _operationsCount;
+
+ private class AllocationContext
+ {
+ public RegisterMasks Masks { get; }
+
+ public StackAllocator StackAlloc { get; }
+
+ public BitMap Active { get; }
+ public BitMap Inactive { get; }
+
+ public int IntUsedRegisters { get; set; }
+ public int VecUsedRegisters { get; set; }
+
+ public AllocationContext(StackAllocator stackAlloc, RegisterMasks masks, int intervalsCount)
+ {
+ StackAlloc = stackAlloc;
+ Masks = masks;
+
+ Active = new BitMap(intervalsCount);
+ Inactive = new BitMap(intervalsCount);
+ }
+
+ public void MoveActiveToInactive(int bit)
+ {
+ Move(Active, Inactive, bit);
+ }
+
+ public void MoveInactiveToActive(int bit)
+ {
+ Move(Inactive, Active, bit);
+ }
+
+ private static void Move(BitMap source, BitMap dest, int bit)
+ {
+ source.Clear(bit);
+
+ dest.Set(bit);
+ }
+ }
+
+ public AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks)
+ {
+ NumberLocals(cfg);
+
+ AllocationContext context = new AllocationContext(stackAlloc, regMasks, _intervals.Count);
+
+ BuildIntervals(cfg, context);
+
+ for (int index = 0; index < _intervals.Count; index++)
+ {
+ LiveInterval current = _intervals[index];
+
+ if (current.IsEmpty)
+ {
+ continue;
+ }
+
+ if (current.IsFixed)
+ {
+ context.Active.Set(index);
+
+ if (current.Register.Type == RegisterType.Integer)
+ {
+ context.IntUsedRegisters |= 1 << current.Register.Index;
+ }
+ else /* if (interval.Register.Type == RegisterType.Vector) */
+ {
+ context.VecUsedRegisters |= 1 << current.Register.Index;
+ }
+
+ continue;
+ }
+
+ AllocateInterval(context, current, index);
+ }
+
+ for (int index = RegistersCount * 2; index < _intervals.Count; index++)
+ {
+ if (!_intervals[index].IsSpilled)
+ {
+ ReplaceLocalWithRegister(_intervals[index]);
+ }
+ }
+
+ InsertSplitCopies();
+ InsertSplitCopiesAtEdges(cfg);
+
+ return new AllocationResult(
+ context.IntUsedRegisters,
+ context.VecUsedRegisters,
+ context.StackAlloc.TotalSize);
+ }
+
+ private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex)
+ {
+ // Check active intervals that already ended.
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.GetEnd() < current.GetStart())
+ {
+ context.Active.Clear(iIndex);
+ }
+ else if (!interval.Overlaps(current.GetStart()))
+ {
+ context.MoveActiveToInactive(iIndex);
+ }
+ }
+
+ // Check inactive intervals that already ended or were reactivated.
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.GetEnd() < current.GetStart())
+ {
+ context.Inactive.Clear(iIndex);
+ }
+ else if (interval.Overlaps(current.GetStart()))
+ {
+ context.MoveInactiveToActive(iIndex);
+ }
+ }
+
+ if (!TryAllocateRegWithoutSpill(context, current, cIndex))
+ {
+ AllocateRegWithSpill(context, current, cIndex);
+ }
+ }
+
+ private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex)
+ {
+ RegisterType regType = current.Local.Type.ToRegisterType();
+
+ int availableRegisters = context.Masks.GetAvailableRegisters(regType);
+
+ int[] freePositions = new int[RegistersCount];
+
+ for (int index = 0; index < RegistersCount; index++)
+ {
+ if ((availableRegisters & (1 << index)) != 0)
+ {
+ freePositions[index] = int.MaxValue;
+ }
+ }
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.Register.Type == regType)
+ {
+ freePositions[interval.Register.Index] = 0;
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.Register.Type == regType)
+ {
+ int overlapPosition = interval.GetOverlapPosition(current);
+
+ if (overlapPosition != LiveInterval.NotFound && freePositions[interval.Register.Index] > overlapPosition)
+ {
+ freePositions[interval.Register.Index] = overlapPosition;
+ }
+ }
+ }
+
+ int selectedReg = GetHighestValueIndex(freePositions);
+
+ int selectedNextUse = freePositions[selectedReg];
+
+ // Intervals starts and ends at odd positions, unless they span an entire
+ // block, in this case they will have ranges at a even position.
+ // When a interval is loaded from the stack to a register, we can only
+ // do the split at a odd position, because otherwise the split interval
+ // that is inserted on the list to be processed may clobber a register
+ // used by the instruction at the same position as the split.
+ // The problem only happens when a interval ends exactly at this instruction,
+ // because otherwise they would interfere, and the register wouldn't be selected.
+ // When the interval is aligned and the above happens, there's no problem as
+ // the instruction that is actually with the last use is the one
+ // before that position.
+ selectedNextUse &= ~InstructionGapMask;
+
+ if (selectedNextUse <= current.GetStart())
+ {
+ return false;
+ }
+ else if (selectedNextUse < current.GetEnd())
+ {
+ Debug.Assert(selectedNextUse > current.GetStart(), "Trying to split interval at the start.");
+
+ LiveInterval splitChild = current.Split(selectedNextUse);
+
+ if (splitChild.UsesCount != 0)
+ {
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+ }
+
+ current.Register = new Register(selectedReg, regType);
+
+ if (regType == RegisterType.Integer)
+ {
+ context.IntUsedRegisters |= 1 << selectedReg;
+ }
+ else /* if (regType == RegisterType.Vector) */
+ {
+ context.VecUsedRegisters |= 1 << selectedReg;
+ }
+
+ context.Active.Set(cIndex);
+
+ return true;
+ }
+
+ private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex)
+ {
+ RegisterType regType = current.Local.Type.ToRegisterType();
+
+ int availableRegisters = context.Masks.GetAvailableRegisters(regType);
+
+ int[] usePositions = new int[RegistersCount];
+ int[] blockedPositions = new int[RegistersCount];
+
+ for (int index = 0; index < RegistersCount; index++)
+ {
+ if ((availableRegisters & (1 << index)) != 0)
+ {
+ usePositions[index] = int.MaxValue;
+
+ blockedPositions[index] = int.MaxValue;
+ }
+ }
+
+ void SetUsePosition(int index, int position)
+ {
+ usePositions[index] = Math.Min(usePositions[index], position);
+ }
+
+ void SetBlockedPosition(int index, int position)
+ {
+ blockedPositions[index] = Math.Min(blockedPositions[index], position);
+
+ SetUsePosition(index, position);
+ }
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register.Type == regType)
+ {
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ if (nextUse != -1)
+ {
+ SetUsePosition(interval.Register.Index, nextUse);
+ }
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register.Type == regType && interval.Overlaps(current))
+ {
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ if (nextUse != -1)
+ {
+ SetUsePosition(interval.Register.Index, nextUse);
+ }
+ }
+ }
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.IsFixed && interval.Register.Type == regType)
+ {
+ SetBlockedPosition(interval.Register.Index, 0);
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.IsFixed && interval.Register.Type == regType)
+ {
+ int overlapPosition = interval.GetOverlapPosition(current);
+
+ if (overlapPosition != LiveInterval.NotFound)
+ {
+ SetBlockedPosition(interval.Register.Index, overlapPosition);
+ }
+ }
+ }
+
+ int selectedReg = GetHighestValueIndex(usePositions);
+
+ int currentFirstUse = current.FirstUse();
+
+ Debug.Assert(currentFirstUse >= 0, "Current interval has no uses.");
+
+ if (usePositions[selectedReg] < currentFirstUse)
+ {
+ // All intervals on inactive and active are being used before current,
+ // so spill the current interval.
+ Debug.Assert(currentFirstUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+ LiveInterval splitChild = current.Split(currentFirstUse);
+
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild);
+
+ Spill(context, current);
+ }
+ else if (blockedPositions[selectedReg] > current.GetEnd())
+ {
+ // Spill made the register available for the entire current lifetime,
+ // so we only need to split the intervals using the selected register.
+ current.Register = new Register(selectedReg, regType);
+
+ SplitAndSpillOverlappingIntervals(context, current);
+
+ context.Active.Set(cIndex);
+ }
+ else
+ {
+ // There are conflicts even after spill due to the use of fixed registers
+ // that can't be spilled, so we need to also split current at the point of
+ // the first fixed register use.
+ current.Register = new Register(selectedReg, regType);
+
+ int splitPosition = blockedPositions[selectedReg] & ~InstructionGapMask;
+
+ Debug.Assert(splitPosition > current.GetStart(), "Trying to split a interval at a invalid position.");
+
+ LiveInterval splitChild = current.Split(splitPosition);
+
+ if (splitChild.UsesCount != 0)
+ {
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+
+ SplitAndSpillOverlappingIntervals(context, current);
+
+ context.Active.Set(cIndex);
+ }
+ }
+
+ private static int GetHighestValueIndex(int[] array)
+ {
+ int higuest = array[0];
+
+ if (higuest == int.MaxValue)
+ {
+ return 0;
+ }
+
+ int selected = 0;
+
+ for (int index = 1; index < array.Length; index++)
+ {
+ int current = array[index];
+
+ if (higuest < current)
+ {
+ higuest = current;
+ selected = index;
+
+ if (current == int.MaxValue)
+ {
+ break;
+ }
+ }
+ }
+
+ return selected;
+ }
+
+ private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current)
+ {
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register == current.Register)
+ {
+ SplitAndSpillOverlappingInterval(context, current, interval);
+
+ context.Active.Clear(iIndex);
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current))
+ {
+ SplitAndSpillOverlappingInterval(context, current, interval);
+
+ context.Inactive.Clear(iIndex);
+ }
+ }
+ }
+
+ private void SplitAndSpillOverlappingInterval(
+ AllocationContext context,
+ LiveInterval current,
+ LiveInterval interval)
+ {
+ // If there's a next use after the start of the current interval,
+ // we need to split the spilled interval twice, and re-insert it
+ // on the "pending" list to ensure that it will get a new register
+ // on that use position.
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ LiveInterval splitChild;
+
+ if (interval.GetStart() < current.GetStart())
+ {
+ splitChild = interval.Split(current.GetStart());
+ }
+ else
+ {
+ splitChild = interval;
+ }
+
+ if (nextUse != -1)
+ {
+ Debug.Assert(nextUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+ if (nextUse > splitChild.GetStart())
+ {
+ LiveInterval right = splitChild.Split(nextUse);
+
+ Spill(context, splitChild);
+
+ splitChild = right;
+ }
+
+ InsertInterval(splitChild);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+ }
+
+ private void InsertInterval(LiveInterval interval)
+ {
+ Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses.");
+ Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval.");
+ Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval.");
+
+ int startIndex = RegistersCount * 2;
+
+ int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null);
+
+ if (insertIndex < 0)
+ {
+ insertIndex = ~insertIndex;
+ }
+
+ _intervals.Insert(insertIndex, interval);
+ }
+
+ private void Spill(AllocationContext context, LiveInterval interval)
+ {
+ Debug.Assert(!interval.IsFixed, "Trying to spill a fixed interval.");
+ Debug.Assert(interval.UsesCount == 0, "Trying to spill a interval with uses.");
+
+ // We first check if any of the siblings were spilled, if so we can reuse
+ // the stack offset. Otherwise, we allocate a new space on the stack.
+ // This prevents stack-to-stack copies being necessary for a split interval.
+ if (!interval.TrySpillWithSiblingOffset())
+ {
+ interval.Spill(context.StackAlloc.Allocate(interval.Local.Type));
+ }
+ }
+
+ private void InsertSplitCopies()
+ {
+ Dictionary<int, CopyResolver> copyResolvers = new Dictionary<int, CopyResolver>();
+
+ CopyResolver GetCopyResolver(int position)
+ {
+ CopyResolver copyResolver = new CopyResolver();
+
+ if (copyResolvers.TryAdd(position, copyResolver))
+ {
+ return copyResolver;
+ }
+
+ return copyResolvers[position];
+ }
+
+ foreach (LiveInterval interval in _intervals.Where(x => x.IsSplit))
+ {
+ LiveInterval previous = interval;
+
+ foreach (LiveInterval splitChild in interval.SplitChilds())
+ {
+ int splitPosition = splitChild.GetStart();
+
+ if (!_blockEdges.Contains(splitPosition) && previous.GetEnd() == splitPosition)
+ {
+ GetCopyResolver(splitPosition).AddSplit(previous, splitChild);
+ }
+
+ previous = splitChild;
+ }
+ }
+
+ foreach (KeyValuePair<int, CopyResolver> kv in copyResolvers)
+ {
+ CopyResolver copyResolver = kv.Value;
+
+ if (!copyResolver.HasCopy)
+ {
+ continue;
+ }
+
+ int splitPosition = kv.Key;
+
+ LinkedListNode<Node> node = GetOperationNode(splitPosition);
+
+ Operation[] sequence = copyResolver.Sequence();
+
+ node = node.List.AddBefore(node, sequence[0]);
+
+ for (int index = 1; index < sequence.Length; index++)
+ {
+ node = node.List.AddAfter(node, sequence[index]);
+ }
+ }
+ }
+
+ private void InsertSplitCopiesAtEdges(ControlFlowGraph cfg)
+ {
+ int blocksCount = cfg.Blocks.Count;
+
+ bool IsSplitEdgeBlock(BasicBlock block)
+ {
+ return block.Index >= blocksCount;
+ }
+
+ for (LinkedListNode<BasicBlock> node = cfg.Blocks.First; node != null; node = node.Next)
+ {
+ BasicBlock block = node.Value;
+
+ if (IsSplitEdgeBlock(block))
+ {
+ continue;
+ }
+
+ bool hasSingleOrNoSuccessor = block.Next == null || block.Branch == null;
+
+ foreach (BasicBlock successor in Successors(block))
+ {
+ int succIndex = successor.Index;
+
+ // If the current node is a split node, then the actual successor node
+ // (the successor before the split) should be right after it.
+ if (IsSplitEdgeBlock(successor))
+ {
+ succIndex = Successors(successor).First().Index;
+ }
+
+ CopyResolver copyResolver = new CopyResolver();
+
+ foreach (int iIndex in _blockLiveIn[succIndex])
+ {
+ LiveInterval interval = _parentIntervals[iIndex];
+
+ if (!interval.IsSplit)
+ {
+ continue;
+ }
+
+ int lEnd = _blockRanges[block.Index].End - 1;
+ int rStart = _blockRanges[succIndex].Start;
+
+ LiveInterval left = interval.GetSplitChild(lEnd);
+ LiveInterval right = interval.GetSplitChild(rStart);
+
+ if (left != null && right != null && left != right)
+ {
+ copyResolver.AddSplit(left, right);
+ }
+ }
+
+ if (!copyResolver.HasCopy)
+ {
+ continue;
+ }
+
+ Operation[] sequence = copyResolver.Sequence();
+
+ if (hasSingleOrNoSuccessor)
+ {
+ foreach (Operation operation in sequence)
+ {
+ block.Append(operation);
+ }
+ }
+ else if (successor.Predecessors.Count == 1)
+ {
+ LinkedListNode<Node> prependNode = successor.Operations.AddFirst(sequence[0]);
+
+ for (int index = 1; index < sequence.Length; index++)
+ {
+ Operation operation = sequence[index];
+
+ prependNode = successor.Operations.AddAfter(prependNode, operation);
+ }
+ }
+ else
+ {
+ // Split the critical edge.
+ BasicBlock splitBlock = cfg.SplitEdge(block, successor);
+
+ foreach (Operation operation in sequence)
+ {
+ splitBlock.Append(operation);
+ }
+ }
+ }
+ }
+ }
+
+ private void ReplaceLocalWithRegister(LiveInterval current)
+ {
+ Operand register = GetRegister(current);
+
+ foreach (int usePosition in current.UsePositions())
+ {
+ Node operation = GetOperationNode(usePosition).Value;
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand source = operation.GetSource(index);
+
+ if (source == current.Local)
+ {
+ operation.SetSource(index, register);
+ }
+ }
+
+ for (int index = 0; index < operation.DestinationsCount; index++)
+ {
+ Operand dest = operation.GetDestination(index);
+
+ if (dest == current.Local)
+ {
+ operation.SetDestination(index, register);
+ }
+ }
+ }
+ }
+
+ private static Operand GetRegister(LiveInterval interval)
+ {
+ Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed.");
+
+ return new Operand(
+ interval.Register.Index,
+ interval.Register.Type,
+ interval.Local.Type);
+ }
+
+ private LinkedListNode<Node> GetOperationNode(int position)
+ {
+ return _operationNodes[position / InstructionGap];
+ }
+
+ private void NumberLocals(ControlFlowGraph cfg)
+ {
+ _operationNodes = new List<LinkedListNode<Node>>();
+
+ _intervals = new List<LiveInterval>();
+
+ for (int index = 0; index < RegistersCount; index++)
+ {
+ _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer)));
+ _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector)));
+ }
+
+ HashSet<Operand> visited = new HashSet<Operand>();
+
+ _operationsCount = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ for (LinkedListNode<Node> node = block.Operations.First; node != null; node = node.Next)
+ {
+ _operationNodes.Add(node);
+
+ Node operation = node.Value;
+
+ foreach (Operand dest in Destinations(operation))
+ {
+ if (dest.Kind == OperandKind.LocalVariable && visited.Add(dest))
+ {
+ dest.NumberLocal(_intervals.Count);
+
+ _intervals.Add(new LiveInterval(dest));
+ }
+ }
+ }
+
+ _operationsCount += block.Operations.Count * InstructionGap;
+
+ if (block.Operations.Count == 0)
+ {
+ // Pretend we have a dummy instruction on the empty block.
+ _operationNodes.Add(null);
+
+ _operationsCount += InstructionGap;
+ }
+ }
+
+ _parentIntervals = _intervals.ToArray();
+ }
+
+ private void BuildIntervals(ControlFlowGraph cfg, AllocationContext context)
+ {
+ _blockRanges = new LiveRange[cfg.Blocks.Count];
+
+ int mapSize = _intervals.Count;
+
+ BitMap[] blkLiveGen = new BitMap[cfg.Blocks.Count];
+ BitMap[] blkLiveKill = new BitMap[cfg.Blocks.Count];
+
+ // Compute local live sets.
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ BitMap liveGen = new BitMap(mapSize);
+ BitMap liveKill = new BitMap(mapSize);
+
+ foreach (Node node in block.Operations)
+ {
+ foreach (Operand source in Sources(node))
+ {
+ int id = GetOperandId(source);
+
+ if (!liveKill.IsSet(id))
+ {
+ liveGen.Set(id);
+ }
+ }
+
+ foreach (Operand dest in Destinations(node))
+ {
+ liveKill.Set(GetOperandId(dest));
+ }
+ }
+
+ blkLiveGen [block.Index] = liveGen;
+ blkLiveKill[block.Index] = liveKill;
+ }
+
+ // Compute global live sets.
+ BitMap[] blkLiveIn = new BitMap[cfg.Blocks.Count];
+ BitMap[] blkLiveOut = new BitMap[cfg.Blocks.Count];
+
+ for (int index = 0; index < cfg.Blocks.Count; index++)
+ {
+ blkLiveIn [index] = new BitMap(mapSize);
+ blkLiveOut[index] = new BitMap(mapSize);
+ }
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ BitMap liveOut = blkLiveOut[block.Index];
+
+ foreach (BasicBlock successor in Successors(block))
+ {
+ if (liveOut.Set(blkLiveIn[successor.Index]))
+ {
+ modified = true;
+ }
+ }
+
+ BitMap liveIn = blkLiveIn[block.Index];
+
+ liveIn.Set (liveOut);
+ liveIn.Clear(blkLiveKill[block.Index]);
+ liveIn.Set (blkLiveGen [block.Index]);
+ }
+ }
+ while (modified);
+
+ _blockLiveIn = blkLiveIn;
+
+ _blockEdges = new HashSet<int>();
+
+ // Compute lifetime intervals.
+ int operationPos = _operationsCount;
+
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ // We handle empty blocks by pretending they have a dummy instruction,
+ // because otherwise the block would have the same start and end position,
+ // and this is not valid.
+ int instCount = Math.Max(block.Operations.Count, 1);
+
+ int blockStart = operationPos - instCount * InstructionGap;
+ int blockEnd = operationPos;
+
+ _blockRanges[block.Index] = new LiveRange(blockStart, blockEnd);
+
+ _blockEdges.Add(blockStart);
+
+ BitMap liveOut = blkLiveOut[block.Index];
+
+ foreach (int id in liveOut)
+ {
+ _intervals[id].AddRange(blockStart, blockEnd);
+ }
+
+ if (block.Operations.Count == 0)
+ {
+ operationPos -= InstructionGap;
+
+ continue;
+ }
+
+ foreach (Node node in BottomOperations(block))
+ {
+ operationPos -= InstructionGap;
+
+ foreach (Operand dest in Destinations(node))
+ {
+ LiveInterval interval = _intervals[GetOperandId(dest)];
+
+ interval.SetStart(operationPos + 1);
+ interval.AddUsePosition(operationPos + 1);
+ }
+
+ foreach (Operand source in Sources(node))
+ {
+ LiveInterval interval = _intervals[GetOperandId(source)];
+
+ interval.AddRange(blockStart, operationPos + 1);
+ interval.AddUsePosition(operationPos);
+ }
+
+ if (node is Operation operation && operation.Instruction == Instruction.Call)
+ {
+ AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer);
+ AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector);
+ }
+ }
+ }
+ }
+
+ private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType)
+ {
+ while (mask != 0)
+ {
+ int regIndex = BitUtils.LowestBitSet(mask);
+
+ Register callerSavedReg = new Register(regIndex, regType);
+
+ LiveInterval interval = _intervals[GetRegisterId(callerSavedReg)];
+
+ interval.AddRange(operationPos + 1, operationPos + InstructionGap);
+
+ mask &= ~(1 << regIndex);
+ }
+ }
+
+ private static int GetOperandId(Operand operand)
+ {
+ if (operand.Kind == OperandKind.LocalVariable)
+ {
+ return operand.AsInt32();
+ }
+ else if (operand.Kind == OperandKind.Register)
+ {
+ return GetRegisterId(operand.GetRegister());
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid operand kind \"{operand.Kind}\".");
+ }
+ }
+
+ private static int GetRegisterId(Register register)
+ {
+ return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0);
+ }
+
+ private static IEnumerable<BasicBlock> Successors(BasicBlock block)
+ {
+ if (block.Next != null)
+ {
+ yield return block.Next;
+ }
+
+ if (block.Branch != null)
+ {
+ yield return block.Branch;
+ }
+ }
+
+ private static IEnumerable<Node> BottomOperations(BasicBlock block)
+ {
+ LinkedListNode<Node> node = block.Operations.Last;
+
+ while (node != null && !(node.Value is PhiNode))
+ {
+ yield return node.Value;
+
+ node = node.Previous;
+ }
+ }
+
+ private static IEnumerable<Operand> Destinations(Node node)
+ {
+ for (int index = 0; index < node.DestinationsCount; index++)
+ {
+ yield return node.GetDestination(index);
+ }
+ }
+
+ private static IEnumerable<Operand> Sources(Node node)
+ {
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ Operand source = node.GetSource(index);
+
+ if (IsLocalOrRegister(source.Kind))
+ {
+ yield return source;
+ }
+ }
+ }
+
+ private static bool IsLocalOrRegister(OperandKind kind)
+ {
+ return kind == OperandKind.LocalVariable ||
+ kind == OperandKind.Register;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
new file mode 100644
index 00000000..18858a76
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
@@ -0,0 +1,390 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class LiveInterval : IComparable<LiveInterval>
+ {
+ public const int NotFound = -1;
+
+ private LiveInterval _parent;
+
+ private SortedSet<int> _usePositions;
+
+ public int UsesCount => _usePositions.Count;
+
+ private List<LiveRange> _ranges;
+
+ private SortedList<int, LiveInterval> _childs;
+
+ public bool IsSplit => _childs.Count != 0;
+
+ public Operand Local { get; }
+
+ public Register Register { get; set; }
+
+ public int SpillOffset { get; private set; }
+
+ public bool IsSpilled => SpillOffset != -1;
+ public bool IsFixed { get; }
+
+ public bool IsEmpty => _ranges.Count == 0;
+
+ public LiveInterval(Operand local = null, LiveInterval parent = null)
+ {
+ Local = local;
+ _parent = parent ?? this;
+
+ _usePositions = new SortedSet<int>();
+
+ _ranges = new List<LiveRange>();
+
+ _childs = new SortedList<int, LiveInterval>();
+
+ SpillOffset = -1;
+ }
+
+ public LiveInterval(Register register) : this()
+ {
+ IsFixed = true;
+ Register = register;
+ }
+
+ public void SetStart(int position)
+ {
+ if (_ranges.Count != 0)
+ {
+ Debug.Assert(position != _ranges[0].End);
+
+ _ranges[0] = new LiveRange(position, _ranges[0].End);
+ }
+ else
+ {
+ _ranges.Add(new LiveRange(position, position + 1));
+ }
+ }
+
+ public int GetStart()
+ {
+ if (_ranges.Count == 0)
+ {
+ throw new InvalidOperationException("Empty interval.");
+ }
+
+ return _ranges[0].Start;
+ }
+
+ public void SetEnd(int position)
+ {
+ if (_ranges.Count != 0)
+ {
+ int lastIdx = _ranges.Count - 1;
+
+ Debug.Assert(position != _ranges[lastIdx].Start);
+
+ _ranges[lastIdx] = new LiveRange(_ranges[lastIdx].Start, position);
+ }
+ else
+ {
+ _ranges.Add(new LiveRange(position, position + 1));
+ }
+ }
+
+ public int GetEnd()
+ {
+ if (_ranges.Count == 0)
+ {
+ throw new InvalidOperationException("Empty interval.");
+ }
+
+ return _ranges[_ranges.Count - 1].End;
+ }
+
+ public void AddRange(int start, int end)
+ {
+ if (start >= end)
+ {
+ throw new ArgumentException("Invalid range start position " + start + ", " + end);
+ }
+
+ int index = _ranges.BinarySearch(new LiveRange(start, end));
+
+ if (index >= 0)
+ {
+ // New range insersects with an existing range, we need to remove
+ // all the intersecting ranges before adding the new one.
+ // We also extend the new range as needed, based on the values of
+ // the existing ranges being removed.
+ int lIndex = index;
+ int rIndex = index;
+
+ while (lIndex > 0 && _ranges[lIndex - 1].End >= start)
+ {
+ lIndex--;
+ }
+
+ while (rIndex + 1 < _ranges.Count && _ranges[rIndex + 1].Start <= end)
+ {
+ rIndex++;
+ }
+
+ if (start > _ranges[lIndex].Start)
+ {
+ start = _ranges[lIndex].Start;
+ }
+
+ if (end < _ranges[rIndex].End)
+ {
+ end = _ranges[rIndex].End;
+ }
+
+ _ranges.RemoveRange(lIndex, (rIndex - lIndex) + 1);
+
+ InsertRange(lIndex, start, end);
+ }
+ else
+ {
+ InsertRange(~index, start, end);
+ }
+ }
+
+ private void InsertRange(int index, int start, int end)
+ {
+ // Here we insert a new range on the ranges list.
+ // If possible, we extend an existing range rather than inserting a new one.
+ // We can extend an existing range if any of the following conditions are true:
+ // - The new range starts right after the end of the previous range on the list.
+ // - The new range ends right before the start of the next range on the list.
+ // If both cases are true, we can extend either one. We prefer to extend the
+ // previous range, and then remove the next one, but theres no specific reason
+ // for that, extending either one will do.
+ int? extIndex = null;
+
+ if (index > 0 && _ranges[index - 1].End == start)
+ {
+ start = _ranges[index - 1].Start;
+
+ extIndex = index - 1;
+ }
+
+ if (index < _ranges.Count && _ranges[index].Start == end)
+ {
+ end = _ranges[index].End;
+
+ if (extIndex.HasValue)
+ {
+ _ranges.RemoveAt(index);
+ }
+ else
+ {
+ extIndex = index;
+ }
+ }
+
+ if (extIndex.HasValue)
+ {
+ _ranges[extIndex.Value] = new LiveRange(start, end);
+ }
+ else
+ {
+ _ranges.Insert(index, new LiveRange(start, end));
+ }
+ }
+
+ public void AddUsePosition(int position)
+ {
+ _usePositions.Add(position);
+ }
+
+ public bool Overlaps(int position)
+ {
+ return _ranges.BinarySearch(new LiveRange(position, position + 1)) >= 0;
+ }
+
+ public bool Overlaps(LiveInterval other)
+ {
+ foreach (LiveRange range in other._ranges)
+ {
+ if (_ranges.BinarySearch(range) >= 0)
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public int GetOverlapPosition(LiveInterval other)
+ {
+ foreach (LiveRange range in other._ranges)
+ {
+ int overlapIndex = _ranges.BinarySearch(range);
+
+ if (overlapIndex >= 0)
+ {
+ // It's possible that we have multiple overlaps within a single interval,
+ // in this case, we pick the one with the lowest start position, since
+ // we return the first overlap position.
+ while (overlapIndex > 0 && _ranges[overlapIndex - 1].End > range.Start)
+ {
+ overlapIndex--;
+ }
+
+ LiveRange overlappingRange = _ranges[overlapIndex];
+
+ return overlappingRange.Start;
+ }
+ }
+
+ return NotFound;
+ }
+
+ public IEnumerable<LiveInterval> SplitChilds()
+ {
+ return _childs.Values;
+ }
+
+ public IEnumerable<int> UsePositions()
+ {
+ return _usePositions;
+ }
+
+ public int FirstUse()
+ {
+ if (_usePositions.Count == 0)
+ {
+ return NotFound;
+ }
+
+ return _usePositions.First();
+ }
+
+ public int NextUseAfter(int position)
+ {
+ foreach (int usePosition in _usePositions)
+ {
+ if (usePosition >= position)
+ {
+ return usePosition;
+ }
+ }
+
+ return NotFound;
+ }
+
+ public LiveInterval Split(int position)
+ {
+ LiveInterval right = new LiveInterval(Local, _parent);
+
+ int splitIndex = 0;
+
+ for (; splitIndex < _ranges.Count; splitIndex++)
+ {
+ LiveRange range = _ranges[splitIndex];
+
+ if (position > range.Start && position <= range.End)
+ {
+ right._ranges.Add(new LiveRange(position, range.End));
+
+ range = new LiveRange(range.Start, position);
+
+ _ranges[splitIndex++] = range;
+
+ break;
+ }
+
+ if (range.Start >= position)
+ {
+ break;
+ }
+ }
+
+ if (splitIndex < _ranges.Count)
+ {
+ int count = _ranges.Count - splitIndex;
+
+ right._ranges.AddRange(_ranges.GetRange(splitIndex, count));
+
+ _ranges.RemoveRange(splitIndex, count);
+ }
+
+ foreach (int usePosition in _usePositions.Where(x => x >= position))
+ {
+ right._usePositions.Add(usePosition);
+ }
+
+ _usePositions.RemoveWhere(x => x >= position);
+
+ Debug.Assert(_ranges.Count != 0, "Left interval is empty after split.");
+
+ Debug.Assert(right._ranges.Count != 0, "Right interval is empty after split.");
+
+ AddSplitChild(right);
+
+ return right;
+ }
+
+ private void AddSplitChild(LiveInterval child)
+ {
+ Debug.Assert(!child.IsEmpty, "Trying to insert a empty interval.");
+
+ _parent._childs.Add(child.GetStart(), child);
+ }
+
+ public LiveInterval GetSplitChild(int position)
+ {
+ if (Overlaps(position))
+ {
+ return this;
+ }
+
+ foreach (LiveInterval splitChild in _childs.Values)
+ {
+ if (splitChild.Overlaps(position))
+ {
+ return splitChild;
+ }
+ }
+
+ return null;
+ }
+
+ public bool TrySpillWithSiblingOffset()
+ {
+ foreach (LiveInterval splitChild in _parent._childs.Values)
+ {
+ if (splitChild.IsSpilled)
+ {
+ Spill(splitChild.SpillOffset);
+
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public void Spill(int offset)
+ {
+ SpillOffset = offset;
+ }
+
+ public int CompareTo(LiveInterval other)
+ {
+ if (_ranges.Count == 0 || other._ranges.Count == 0)
+ {
+ return _ranges.Count.CompareTo(other._ranges.Count);
+ }
+
+ return _ranges[0].Start.CompareTo(other._ranges[0].Start);
+ }
+
+ public override string ToString()
+ {
+ return string.Join("; ", _ranges);
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
new file mode 100644
index 00000000..b5faeffd
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
@@ -0,0 +1,31 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ struct LiveRange : IComparable<LiveRange>
+ {
+ public int Start { get; }
+ public int End { get; }
+
+ public LiveRange(int start, int end)
+ {
+ Start = start;
+ End = end;
+ }
+
+ public int CompareTo(LiveRange other)
+ {
+ if (Start < other.End && other.Start < End)
+ {
+ return 0;
+ }
+
+ return Start.CompareTo(other.Start);
+ }
+
+ public override string ToString()
+ {
+ return $"[{Start}, {End}[";
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
new file mode 100644
index 00000000..9652224e
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
@@ -0,0 +1,47 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ struct RegisterMasks
+ {
+ public int IntAvailableRegisters { get; }
+ public int VecAvailableRegisters { get; }
+ public int IntCallerSavedRegisters { get; }
+ public int VecCallerSavedRegisters { get; }
+ public int IntCalleeSavedRegisters { get; }
+ public int VecCalleeSavedRegisters { get; }
+
+ public RegisterMasks(
+ int intAvailableRegisters,
+ int vecAvailableRegisters,
+ int intCallerSavedRegisters,
+ int vecCallerSavedRegisters,
+ int intCalleeSavedRegisters,
+ int vecCalleeSavedRegisters)
+ {
+ IntAvailableRegisters = intAvailableRegisters;
+ VecAvailableRegisters = vecAvailableRegisters;
+ IntCallerSavedRegisters = intCallerSavedRegisters;
+ VecCallerSavedRegisters = vecCallerSavedRegisters;
+ IntCalleeSavedRegisters = intCalleeSavedRegisters;
+ VecCalleeSavedRegisters = vecCalleeSavedRegisters;
+ }
+
+ public int GetAvailableRegisters(RegisterType type)
+ {
+ if (type == RegisterType.Integer)
+ {
+ return IntAvailableRegisters;
+ }
+ else if (type == RegisterType.Vector)
+ {
+ return VecAvailableRegisters;
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid register type \"{type}\".");
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
new file mode 100644
index 00000000..a6233d6e
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
@@ -0,0 +1,27 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class StackAllocator
+ {
+ private int _offset;
+
+ public int TotalSize => _offset;
+
+ public int Allocate(OperandType type)
+ {
+ return Allocate(type.GetSizeInBytes());
+ }
+
+ public int Allocate(int sizeInBytes)
+ {
+ int offset = _offset;
+
+ _offset += sizeInBytes;
+
+ return offset;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs b/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
new file mode 100644
index 00000000..4955f1b4
--- /dev/null
+++ b/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ struct UnwindInfo
+ {
+ public UnwindPushEntry[] PushEntries { get; }
+
+ public int PrologueSize { get; }
+
+ public int FixedAllocSize { get; }
+
+ public UnwindInfo(UnwindPushEntry[] pushEntries, int prologueSize, int fixedAllocSize)
+ {
+ PushEntries = pushEntries;
+ PrologueSize = prologueSize;
+ FixedAllocSize = fixedAllocSize;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs b/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
new file mode 100644
index 00000000..6597e2b4
--- /dev/null
+++ b/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
@@ -0,0 +1,20 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ struct UnwindPushEntry
+ {
+ public int Index { get; }
+
+ public RegisterType Type { get; }
+
+ public int StreamEndOffset { get; }
+
+ public UnwindPushEntry(int index, RegisterType type, int streamEndOffset)
+ {
+ Index = index;
+ Type = type;
+ StreamEndOffset = streamEndOffset;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs
new file mode 100644
index 00000000..c6483894
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -0,0 +1,1358 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class Assembler
+ {
+ private const int BadOp = 0;
+ private const int OpModRMBits = 24;
+
+ private const byte RexPrefix = 0x40;
+ private const byte RexWPrefix = 0x48;
+ private const byte LockPrefix = 0xf0;
+
+ [Flags]
+ private enum InstructionFlags
+ {
+ None = 0,
+ RegOnly = 1 << 0,
+ Reg8Src = 1 << 1,
+ Reg8Dest = 1 << 2,
+ RexW = 1 << 3,
+ Vex = 1 << 4,
+
+ PrefixBit = 16,
+ PrefixMask = 3 << PrefixBit,
+ Prefix66 = 1 << PrefixBit,
+ PrefixF3 = 2 << PrefixBit,
+ PrefixF2 = 3 << PrefixBit
+ }
+
+ private struct InstructionInfo
+ {
+ public int OpRMR { get; }
+ public int OpRMImm8 { get; }
+ public int OpRMImm32 { get; }
+ public int OpRImm64 { get; }
+ public int OpRRM { get; }
+
+ public InstructionFlags Flags { get; }
+
+ public InstructionInfo(
+ int opRMR,
+ int opRMImm8,
+ int opRMImm32,
+ int opRImm64,
+ int opRRM,
+ InstructionFlags flags)
+ {
+ OpRMR = opRMR;
+ OpRMImm8 = opRMImm8;
+ OpRMImm32 = opRMImm32;
+ OpRImm64 = opRImm64;
+ OpRRM = opRRM;
+ Flags = flags;
+ }
+ }
+
+ private static InstructionInfo[] _instTable;
+
+ private Stream _stream;
+
+ static Assembler()
+ {
+ _instTable = new InstructionInfo[(int)X86Instruction.Count];
+
+ // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags
+ Add(X86Instruction.Add, new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstructionFlags.None));
+ Add(X86Instruction.Addpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex));
+ Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None));
+ Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex));
+ Add(X86Instruction.Bsr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstructionFlags.None));
+ Add(X86Instruction.Bswap, new InstructionInfo(0x00000fc8, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RegOnly));
+ Add(X86Instruction.Call, new InstructionInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Cmovcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstructionFlags.None));
+ Add(X86Instruction.Cmp, new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstructionFlags.None));
+ Add(X86Instruction.Cmppd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cmpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex));
+ Add(X86Instruction.Cmpsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cmpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW));
+ Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex));
+ Add(X86Instruction.Cpuid, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fa2, InstructionFlags.RegOnly));
+ Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex));
+ Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtpd2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cvtps2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cvtps2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex));
+ Add(X86Instruction.Cvtsd2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsd2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsi2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsi2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtss2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Div, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstructionFlags.None));
+ Add(X86Instruction.Divpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Divps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex));
+ Add(X86Instruction.Divsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Divss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Haddpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Haddps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Idiv, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstructionFlags.None));
+ Add(X86Instruction.Imul, new InstructionInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstructionFlags.None));
+ Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None));
+ Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None));
+ Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex));
+ Add(X86Instruction.Maxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Maxss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Minpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Minps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex));
+ Add(X86Instruction.Minsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Minss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Mov, new InstructionInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None));
+ Add(X86Instruction.Mov16, new InstructionInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstructionFlags.Prefix66));
+ Add(X86Instruction.Mov8, new InstructionInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest));
+ Add(X86Instruction.Movd, new InstructionInfo(0x00000f7e, BadOp, BadOp, BadOp, 0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Movdqu, new InstructionInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movhlps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstructionFlags.Vex));
+ Add(X86Instruction.Movlhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstructionFlags.Vex));
+ Add(X86Instruction.Movq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movsd, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Movss, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movsx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstructionFlags.None));
+ Add(X86Instruction.Movsx32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstructionFlags.None));
+ Add(X86Instruction.Movsx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Movzx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstructionFlags.None));
+ Add(X86Instruction.Movzx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Mul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstructionFlags.None));
+ Add(X86Instruction.Mulpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Mulps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex));
+ Add(X86Instruction.Mulsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Mulss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Neg, new InstructionInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Not, new InstructionInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Or, new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstructionFlags.None));
+ Add(X86Instruction.Paddb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrb, new InstructionInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrd, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrq, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmulld, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmullw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pop, new InstructionInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Popcnt, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstructionFlags.PrefixF3));
+ Add(X86Instruction.Por, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pshufb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pshufd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pslld, new InstructionInfo(BadOp, 0x06000f72, BadOp, BadOp, 0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pslldq, new InstructionInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psllq, new InstructionInfo(BadOp, 0x06000f73, BadOp, BadOp, 0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psllw, new InstructionInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrad, new InstructionInfo(BadOp, 0x04000f72, BadOp, BadOp, 0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psraw, new InstructionInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrld, new InstructionInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrlq, new InstructionInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrldq, new InstructionInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrlw, new InstructionInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckldq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Push, new InstructionInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstructionFlags.None));
+ Add(X86Instruction.Pxor, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Rcpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex));
+ Add(X86Instruction.Rcpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Ror, new InstructionInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Roundpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Rsqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex));
+ Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest));
+ Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Shufps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex));
+ Add(X86Instruction.Sqrtpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex));
+ Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None));
+ Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex));
+ Add(X86Instruction.Subsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Subss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Test, new InstructionInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Unpckhpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex));
+ Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex));
+ Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
+ Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex));
+ }
+
+ private static void Add(X86Instruction inst, InstructionInfo info)
+ {
+ _instTable[(int)inst] = info;
+ }
+
+ public Assembler(Stream stream)
+ {
+ _stream = stream;
+ }
+
+ public void Add(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Add);
+ }
+
+ public void Addsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Addsd);
+ }
+
+ public void Addss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Addss);
+ }
+
+ public void And(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.And);
+ }
+
+ public void Bsr(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Bsr);
+ }
+
+ public void Bswap(Operand dest)
+ {
+ WriteInstruction(dest, null, dest.Type, X86Instruction.Bswap);
+ }
+
+ public void Call(Operand dest)
+ {
+ WriteInstruction(dest, null, OperandType.None, X86Instruction.Call);
+ }
+
+ public void Cdq()
+ {
+ WriteByte(0x99);
+ }
+
+ public void Cmovcc(Operand dest, Operand source, OperandType type, X86Condition condition)
+ {
+ InstructionInfo info = _instTable[(int)X86Instruction.Cmovcc];
+
+ WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM | (int)condition, rrm: true);
+ }
+
+ public void Cmp(Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(src1, src2, type, X86Instruction.Cmp);
+ }
+
+ public void Cqo()
+ {
+ WriteByte(0x48);
+ WriteByte(0x99);
+ }
+
+ public void Cmpxchg16b(MemoryOperand memOp)
+ {
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, null, OperandType.None, X86Instruction.Cmpxchg16b);
+ }
+
+ public void Comisd(Operand src1, Operand src2)
+ {
+ WriteInstruction(src1, null, src2, X86Instruction.Comisd);
+ }
+
+ public void Comiss(Operand src1, Operand src2)
+ {
+ WriteInstruction(src1, null, src2, X86Instruction.Comiss);
+ }
+
+ public void Cpuid()
+ {
+ WriteInstruction(null, null, OperandType.None, X86Instruction.Cpuid);
+ }
+
+ public void Cvtsd2ss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss);
+ }
+
+ public void Cvtsi2sd(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2sd, type);
+ }
+
+ public void Cvtsi2ss(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2ss, type);
+ }
+
+ public void Cvtss2sd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtss2sd);
+ }
+
+ public void Div(Operand source)
+ {
+ WriteInstruction(null, source, source.Type, X86Instruction.Div);
+ }
+
+ public void Divsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Divsd);
+ }
+
+ public void Divss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Divss);
+ }
+
+ public void Idiv(Operand source)
+ {
+ WriteInstruction(null, source, source.Type, X86Instruction.Idiv);
+ }
+
+ public void Imul(Operand source)
+ {
+ WriteInstruction(null, source, source.Type, X86Instruction.Imul128);
+ }
+
+ public void Imul(Operand dest, Operand source, OperandType type)
+ {
+ if (source.Kind != OperandKind.Register)
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+ }
+
+ WriteInstruction(dest, source, type, X86Instruction.Imul);
+ }
+
+ public void Imul(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ InstructionInfo info = _instTable[(int)X86Instruction.Imul];
+
+ if (src2.Kind != OperandKind.Constant)
+ {
+ throw new ArgumentException($"Invalid source 2 operand kind \"{src2.Kind}\".");
+ }
+
+ if (IsImm8(src2.Value, src2.Type) && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, null, src1, type, info.Flags, info.OpRMImm8, rrm: true);
+
+ WriteByte(src2.AsByte());
+ }
+ else if (IsImm32(src2.Value, src2.Type) && info.OpRMImm32 != BadOp)
+ {
+ WriteOpCode(dest, null, src1, type, info.Flags, info.OpRMImm32, rrm: true);
+
+ WriteInt32(src2.AsInt32());
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{src2.Value:X}.");
+ }
+ }
+
+ public void Insertps(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Insertps);
+
+ WriteByte(imm);
+ }
+
+ public void Jcc(X86Condition condition, long offset)
+ {
+ if (ConstFitsOnS8(offset))
+ {
+ WriteByte((byte)(0x70 | (int)condition));
+
+ WriteByte((byte)offset);
+ }
+ else if (ConstFitsOnS32(offset))
+ {
+ WriteByte(0x0f);
+ WriteByte((byte)(0x80 | (int)condition));
+
+ WriteInt32((int)offset);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public void Jmp(long offset)
+ {
+ if (ConstFitsOnS8(offset))
+ {
+ WriteByte(0xeb);
+
+ WriteByte((byte)offset);
+ }
+ else if (ConstFitsOnS32(offset))
+ {
+ WriteByte(0xe9);
+
+ WriteInt32((int)offset);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public void Lea(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Lea);
+ }
+
+ public void Mov(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Mov);
+ }
+
+ public void Mov16(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov16);
+ }
+
+ public void Mov8(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov8);
+ }
+
+ public void Movd(Operand dest, Operand source)
+ {
+ InstructionInfo info = _instTable[(int)X86Instruction.Movd];
+
+ if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, null, source, OperandType.None, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ WriteOpCode(dest, null, source, OperandType.None, info.Flags, info.OpRMR);
+ }
+ }
+
+ public void Movdqu(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Movdqu);
+ }
+
+ public void Movhlps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movhlps);
+ }
+
+ public void Movlhps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movlhps);
+ }
+
+ public void Movq(Operand dest, Operand source)
+ {
+ InstructionInfo info = _instTable[(int)X86Instruction.Movd];
+
+ InstructionFlags flags = info.Flags | InstructionFlags.RexW;
+
+ if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, null, source, OperandType.None, flags, info.OpRRM, rrm: true);
+ }
+ else if (dest.Type.IsInteger() || dest.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, null, source, OperandType.None, flags, info.OpRMR);
+ }
+ else
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Movq);
+ }
+ }
+
+ public void Movsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movsd);
+ }
+
+ public void Movss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movss);
+ }
+
+ public void Movsx16(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx16);
+ }
+
+ public void Movsx32(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx32);
+ }
+
+ public void Movsx8(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx8);
+ }
+
+ public void Movzx16(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movzx16);
+ }
+
+ public void Movzx8(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movzx8);
+ }
+
+ public void Mul(Operand source)
+ {
+ WriteInstruction(null, source, source.Type, X86Instruction.Mul128);
+ }
+
+ public void Mulsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Mulsd);
+ }
+
+ public void Mulss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Mulss);
+ }
+
+ public void Neg(Operand dest)
+ {
+ WriteInstruction(dest, null, dest.Type, X86Instruction.Neg);
+ }
+
+ public void Not(Operand dest)
+ {
+ WriteInstruction(dest, null, dest.Type, X86Instruction.Not);
+ }
+
+ public void Or(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Or);
+ }
+
+ public void Pcmpeqw(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pcmpeqw);
+ }
+
+ public void Pextrb(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Pextrb);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrd(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Pextrd);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrq(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Pextrq);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrw(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Pextrw);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrb(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrb);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrd(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrd);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrq(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrq);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrw(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrw);
+
+ WriteByte(imm);
+ }
+
+ public void Pop(Operand dest)
+ {
+ if (dest.Kind == OperandKind.Register)
+ {
+ WriteCompactInst(dest, 0x58);
+ }
+ else
+ {
+ WriteInstruction(dest, null, dest.Type, X86Instruction.Pop);
+ }
+ }
+
+ public void Popcnt(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Popcnt);
+ }
+
+ public void Pshufd(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Pshufd);
+
+ WriteByte(imm);
+ }
+
+ public void Push(Operand source)
+ {
+ if (source.Kind == OperandKind.Register)
+ {
+ WriteCompactInst(source, 0x50);
+ }
+ else
+ {
+ WriteInstruction(null, source, source.Type, X86Instruction.Push);
+ }
+ }
+
+ public void Return()
+ {
+ WriteByte(0xc3);
+ }
+
+ public void Ror(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Ror);
+ }
+
+ public void Sar(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Sar);
+ }
+
+ public void Shl(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Shl);
+ }
+
+ public void Shr(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Shr);
+ }
+
+ public void Setcc(Operand dest, X86Condition condition)
+ {
+ InstructionInfo info = _instTable[(int)X86Instruction.Setcc];
+
+ WriteOpCode(dest, null, null, OperandType.None, info.Flags, info.OpRRM | (int)condition);
+ }
+
+ public void Sub(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Sub);
+ }
+
+ public void Subsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Subsd);
+ }
+
+ public void Subss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Subss);
+ }
+
+ public void Test(Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(src1, src2, type, X86Instruction.Test);
+ }
+
+ public void Xor(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Xor);
+ }
+
+ public void Xorps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Xorps);
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand source,
+ OperandType type = OperandType.None)
+ {
+ WriteInstruction(dest, null, source, inst, type);
+ }
+
+ public void WriteInstruction(X86Instruction inst, Operand dest, Operand src1, Operand src2)
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ WriteInstruction(src1, dest, src2, inst);
+ }
+ else
+ {
+ WriteInstruction(dest, src1, src2, inst);
+ }
+ }
+
+ public void WriteInstruction(X86Instruction inst, Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, inst);
+
+ WriteByte(imm);
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ Operand src3)
+ {
+ // 3+ operands can only be encoded with the VEX encoding scheme.
+ Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
+
+ WriteInstruction(dest, src1, src2, inst);
+
+ WriteByte((byte)(src3.AsByte() << 4));
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ byte imm)
+ {
+ WriteInstruction(dest, src1, src2, inst);
+
+ WriteByte(imm);
+ }
+
+ private void WriteShiftInst(Operand dest, Operand source, OperandType type, X86Instruction inst)
+ {
+ if (source.Kind == OperandKind.Register)
+ {
+ X86Register shiftReg = (X86Register)source.GetRegister().Index;
+
+ if (shiftReg != X86Register.Rcx)
+ {
+ throw new ArgumentException($"Invalid shift register \"{shiftReg}\".");
+ }
+
+ source = null;
+ }
+
+ WriteInstruction(dest, source, type, inst);
+ }
+
+ private void WriteInstruction(Operand dest, Operand source, OperandType type, X86Instruction inst)
+ {
+ InstructionInfo info = _instTable[(int)inst];
+
+ if (source != null)
+ {
+ if (source.Kind == OperandKind.Constant)
+ {
+ ulong imm = source.Value;
+
+ if (inst == X86Instruction.Mov8)
+ {
+ WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else if (inst == X86Instruction.Mov16)
+ {
+ WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm32);
+
+ WriteInt16((short)imm);
+ }
+ else if (IsImm8(imm, type) && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else if (IsImm32(imm, type) && info.OpRMImm32 != BadOp)
+ {
+ WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm32);
+
+ WriteInt32((int)imm);
+ }
+ else if (dest != null && dest.Kind == OperandKind.Register && info.OpRImm64 != BadOp)
+ {
+ int rexPrefix = GetRexPrefix(dest, source, type, rrm: false);
+
+ if (rexPrefix != 0)
+ {
+ WriteByte((byte)rexPrefix);
+ }
+
+ WriteByte((byte)(info.OpRImm64 + (dest.GetRegister().Index & 0b111)));
+
+ WriteUInt64(imm);
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+ }
+ }
+ else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, null, source, type, info.Flags, info.OpRMR);
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+ }
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else if (info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, null, source, type, info.Flags, info.OpRMR);
+ }
+ else
+ {
+ throw new ArgumentNullException(nameof(source));
+ }
+ }
+
+ private void WriteInstruction(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ X86Instruction inst,
+ OperandType type = OperandType.None)
+ {
+ InstructionInfo info = _instTable[(int)inst];
+
+ if (src2 != null)
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ ulong imm = src2.Value;
+
+ if ((byte)imm == imm && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, src1, null, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+ }
+ }
+ else if (src2.Kind == OperandKind.Register && info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{src2.Kind}\".");
+ }
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else if (info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+ }
+ else
+ {
+ throw new ArgumentNullException(nameof(src2));
+ }
+ }
+
+ private void WriteOpCode(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type,
+ InstructionFlags flags,
+ int opCode,
+ bool rrm = false)
+ {
+ int rexPrefix = GetRexPrefix(dest, src2, type, rrm);
+
+ if ((flags & InstructionFlags.RexW) != 0)
+ {
+ rexPrefix |= RexWPrefix;
+ }
+
+ int modRM = (opCode >> OpModRMBits) << 3;
+
+ MemoryOperand memOp = null;
+
+ if (dest != null)
+ {
+ if (dest.Kind == OperandKind.Register)
+ {
+ int regIndex = dest.GetRegister().Index;
+
+ modRM |= (regIndex & 0b111) << (rrm ? 3 : 0);
+
+ if ((flags & InstructionFlags.Reg8Dest) != 0 && regIndex >= 4)
+ {
+ rexPrefix |= RexPrefix;
+ }
+ }
+ else if (dest.Kind == OperandKind.Memory)
+ {
+ memOp = dest as MemoryOperand;
+ }
+ else
+ {
+ throw new ArgumentException("Invalid destination operand kind \"" + dest.Kind + "\".");
+ }
+ }
+
+ if (src2 != null)
+ {
+ if (src2.Kind == OperandKind.Register)
+ {
+ int regIndex = src2.GetRegister().Index;
+
+ modRM |= (regIndex & 0b111) << (rrm ? 0 : 3);
+
+ if ((flags & InstructionFlags.Reg8Src) != 0 && regIndex >= 4)
+ {
+ rexPrefix |= RexPrefix;
+ }
+ }
+ else if (src2.Kind == OperandKind.Memory && memOp == null)
+ {
+ memOp = src2 as MemoryOperand;
+ }
+ else
+ {
+ throw new ArgumentException("Invalid source operand kind \"" + src2.Kind + "\".");
+ }
+ }
+
+ bool needsSibByte = false;
+ bool needsDisplacement = false;
+
+ int sib = 0;
+
+ if (memOp != null)
+ {
+ // Either source or destination is a memory operand.
+ Register baseReg = memOp.BaseAddress.GetRegister();
+
+ X86Register baseRegLow = (X86Register)(baseReg.Index & 0b111);
+
+ needsSibByte = memOp.Index != null || baseRegLow == X86Register.Rsp;
+ needsDisplacement = memOp.Displacement != 0 || baseRegLow == X86Register.Rbp;
+
+ if (needsDisplacement)
+ {
+ if (ConstFitsOnS8(memOp.Displacement))
+ {
+ modRM |= 0x40;
+ }
+ else /* if (ConstFitsOnS32(memOp.Displacement)) */
+ {
+ modRM |= 0x80;
+ }
+ }
+
+ if (baseReg.Index >= 8)
+ {
+ rexPrefix |= RexPrefix | (baseReg.Index >> 3);
+ }
+
+ if (needsSibByte)
+ {
+ sib = (int)baseRegLow;
+
+ if (memOp.Index != null)
+ {
+ int indexReg = memOp.Index.GetRegister().Index;
+
+ if (indexReg == (int)X86Register.Rsp)
+ {
+ throw new ArgumentException("Using RSP as index register on the memory operand is not allowed.");
+ }
+
+ if (indexReg >= 8)
+ {
+ rexPrefix |= RexPrefix | (indexReg >> 3) << 1;
+ }
+
+ sib |= (indexReg & 0b111) << 3;
+ }
+ else
+ {
+ sib |= 0b100 << 3;
+ }
+
+ sib |= (int)memOp.Scale << 6;
+
+ modRM |= 0b100;
+ }
+ else
+ {
+ modRM |= (int)baseRegLow;
+ }
+ }
+ else
+ {
+ // Source and destination are registers.
+ modRM |= 0xc0;
+ }
+
+ Debug.Assert(opCode != BadOp, "Invalid opcode value.");
+
+ if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
+ {
+ int vexByte2 = (int)(flags & InstructionFlags.PrefixMask) >> (int)InstructionFlags.PrefixBit;
+
+ if (src1 != null)
+ {
+ vexByte2 |= (src1.GetRegister().Index ^ 0xf) << 3;
+ }
+ else
+ {
+ vexByte2 |= 0b1111 << 3;
+ }
+
+ ushort opCodeHigh = (ushort)(opCode >> 8);
+
+ if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf)
+ {
+ // Two-byte form.
+ WriteByte(0xc5);
+
+ vexByte2 |= (~rexPrefix & 4) << 5;
+
+ WriteByte((byte)vexByte2);
+ }
+ else
+ {
+ // Three-byte form.
+ WriteByte(0xc4);
+
+ int vexByte1 = (~rexPrefix & 7) << 5;
+
+ switch (opCodeHigh)
+ {
+ case 0xf: vexByte1 |= 1; break;
+ case 0xf38: vexByte1 |= 2; break;
+ case 0xf3a: vexByte1 |= 3; break;
+
+ default: Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}."); break;
+ }
+
+ vexByte2 |= (rexPrefix & 8) << 4;
+
+ WriteByte((byte)vexByte1);
+ WriteByte((byte)vexByte2);
+ }
+
+ opCode &= 0xff;
+ }
+ else
+ {
+ switch (flags & InstructionFlags.PrefixMask)
+ {
+ case InstructionFlags.Prefix66: WriteByte(0x66); break;
+ case InstructionFlags.PrefixF2: WriteByte(0xf2); break;
+ case InstructionFlags.PrefixF3: WriteByte(0xf3); break;
+ }
+
+ if (rexPrefix != 0)
+ {
+ WriteByte((byte)rexPrefix);
+ }
+ }
+
+ if (dest != null && (flags & InstructionFlags.RegOnly) != 0)
+ {
+ opCode += dest.GetRegister().Index & 7;
+ }
+
+ if ((opCode & 0xff0000) != 0)
+ {
+ WriteByte((byte)(opCode >> 16));
+ }
+
+ if ((opCode & 0xff00) != 0)
+ {
+ WriteByte((byte)(opCode >> 8));
+ }
+
+ WriteByte((byte)opCode);
+
+ if ((flags & InstructionFlags.RegOnly) == 0)
+ {
+ WriteByte((byte)modRM);
+
+ if (needsSibByte)
+ {
+ WriteByte((byte)sib);
+ }
+
+ if (needsDisplacement)
+ {
+ if (ConstFitsOnS8(memOp.Displacement))
+ {
+ WriteByte((byte)memOp.Displacement);
+ }
+ else /* if (ConstFitsOnS32(memOp.Displacement)) */
+ {
+ WriteInt32(memOp.Displacement);
+ }
+ }
+ }
+ }
+
+ private void WriteCompactInst(Operand operand, int opCode)
+ {
+ int regIndex = operand.GetRegister().Index;
+
+ if (regIndex >= 8)
+ {
+ WriteByte(0x41);
+ }
+
+ WriteByte((byte)(opCode + (regIndex & 0b111)));
+ }
+
+ private static int GetRexPrefix(Operand dest, Operand source, OperandType type, bool rrm)
+ {
+ int rexPrefix = 0;
+
+ if (Is64Bits(type))
+ {
+ rexPrefix = RexWPrefix;
+ }
+
+ void SetRegisterHighBit(Register reg, int bit)
+ {
+ if (reg.Index >= 8)
+ {
+ rexPrefix |= RexPrefix | (reg.Index >> 3) << bit;
+ }
+ }
+
+ if (dest != null && dest.Kind == OperandKind.Register)
+ {
+ SetRegisterHighBit(dest.GetRegister(), rrm ? 2 : 0);
+ }
+
+ if (source != null && source.Kind == OperandKind.Register)
+ {
+ SetRegisterHighBit(source.GetRegister(), rrm ? 0 : 2);
+ }
+
+ return rexPrefix;
+ }
+
+ private static bool Is64Bits(OperandType type)
+ {
+ return type == OperandType.I64 || type == OperandType.FP64;
+ }
+
+ private static bool IsImm8(ulong immediate, OperandType type)
+ {
+ long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+ return ConstFitsOnS8(value);
+ }
+
+ private static bool IsImm32(ulong immediate, OperandType type)
+ {
+ long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+ return ConstFitsOnS32(value);
+ }
+
+ public static int GetJccLength(long offset)
+ {
+ if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+ {
+ return 2;
+ }
+ else if (ConstFitsOnS32(offset < 0 ? offset - 6 : offset))
+ {
+ return 6;
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public static int GetJmpLength(long offset)
+ {
+ if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+ {
+ return 2;
+ }
+ else if (ConstFitsOnS32(offset < 0 ? offset - 5 : offset))
+ {
+ return 5;
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ private static bool ConstFitsOnS8(long value)
+ {
+ return value == (sbyte)value;
+ }
+
+ private static bool ConstFitsOnS32(long value)
+ {
+ return value == (int)value;
+ }
+
+ private void WriteInt16(short value)
+ {
+ WriteUInt16((ushort)value);
+ }
+
+ private void WriteInt32(int value)
+ {
+ WriteUInt32((uint)value);
+ }
+
+ private void WriteByte(byte value)
+ {
+ _stream.WriteByte(value);
+ }
+
+ private void WriteUInt16(ushort value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ }
+
+ private void WriteUInt32(uint value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ }
+
+ private void WriteUInt64(ulong value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ _stream.WriteByte((byte)(value >> 32));
+ _stream.WriteByte((byte)(value >> 40));
+ _stream.WriteByte((byte)(value >> 48));
+ _stream.WriteByte((byte)(value >> 56));
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CallConvName.cs b/ARMeilleure/CodeGen/X86/CallConvName.cs
new file mode 100644
index 00000000..be367628
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CallConvName.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum CallConvName
+ {
+ SystemV,
+ Windows
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CallingConvention.cs b/ARMeilleure/CodeGen/X86/CallingConvention.cs
new file mode 100644
index 00000000..2769fd93
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CallingConvention.cs
@@ -0,0 +1,159 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CallingConvention
+ {
+ private const int RegistersMask = 0xffff;
+
+ public static int GetIntAvailableRegisters()
+ {
+ return RegistersMask & ~(1 << (int)X86Register.Rsp);
+ }
+
+ public static int GetVecAvailableRegisters()
+ {
+ return RegistersMask;
+ }
+
+ public static int GetIntCallerSavedRegisters()
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ return (1 << (int)X86Register.Rax) |
+ (1 << (int)X86Register.Rcx) |
+ (1 << (int)X86Register.Rdx) |
+ (1 << (int)X86Register.R8) |
+ (1 << (int)X86Register.R9) |
+ (1 << (int)X86Register.R10) |
+ (1 << (int)X86Register.R11);
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ return (1 << (int)X86Register.Rax) |
+ (1 << (int)X86Register.Rcx) |
+ (1 << (int)X86Register.Rdx) |
+ (1 << (int)X86Register.Rsi) |
+ (1 << (int)X86Register.Rdi) |
+ (1 << (int)X86Register.R8) |
+ (1 << (int)X86Register.R9) |
+ (1 << (int)X86Register.R10) |
+ (1 << (int)X86Register.R11);
+ }
+ }
+
+ public static int GetVecCallerSavedRegisters()
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ return (1 << (int)X86Register.Xmm0) |
+ (1 << (int)X86Register.Xmm1) |
+ (1 << (int)X86Register.Xmm2) |
+ (1 << (int)X86Register.Xmm3) |
+ (1 << (int)X86Register.Xmm4) |
+ (1 << (int)X86Register.Xmm5);
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ return RegistersMask;
+ }
+ }
+
+ public static int GetIntCalleeSavedRegisters()
+ {
+ return GetIntCallerSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetVecCalleeSavedRegisters()
+ {
+ return GetVecCallerSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetArgumentsOnRegsCount()
+ {
+ return 4;
+ }
+
+ public static int GetIntArgumentsOnRegsCount()
+ {
+ return 6;
+ }
+
+ public static int GetVecArgumentsOnRegsCount()
+ {
+ return 8;
+ }
+
+ public static X86Register GetIntArgumentRegister(int index)
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ switch (index)
+ {
+ case 0: return X86Register.Rcx;
+ case 1: return X86Register.Rdx;
+ case 2: return X86Register.R8;
+ case 3: return X86Register.R9;
+ }
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ switch (index)
+ {
+ case 0: return X86Register.Rdi;
+ case 1: return X86Register.Rsi;
+ case 2: return X86Register.Rdx;
+ case 3: return X86Register.Rcx;
+ case 4: return X86Register.R8;
+ case 5: return X86Register.R9;
+ }
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static X86Register GetVecArgumentRegister(int index)
+ {
+ int count;
+
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ count = 4;
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ count = 8;
+ }
+
+ if ((uint)index < count)
+ {
+ return X86Register.Xmm0 + index;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static X86Register GetIntReturnRegister()
+ {
+ return X86Register.Rax;
+ }
+
+ public static X86Register GetIntReturnRegisterHigh()
+ {
+ return X86Register.Rdx;
+ }
+
+ public static X86Register GetVecReturnRegister()
+ {
+ return X86Register.Xmm0;
+ }
+
+ public static CallConvName GetCurrentCallConv()
+ {
+ return RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
+ ? CallConvName.Windows
+ : CallConvName.SystemV;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/ARMeilleure/CodeGen/X86/CodeGenContext.cs
new file mode 100644
index 00000000..d719b516
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CodeGenContext.cs
@@ -0,0 +1,305 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class CodeGenContext
+ {
+ private const int ReservedBytesForJump = 1;
+
+ private Stream _stream;
+
+ public int StreamOffset => (int)_stream.Length;
+
+ public AllocationResult AllocResult { get; }
+
+ public Assembler Assembler { get; }
+
+ public BasicBlock CurrBlock { get; private set; }
+
+ public int CallArgsRegionSize { get; }
+ public int XmmSaveRegionSize { get; }
+
+ private long[] _blockOffsets;
+
+ private struct Jump
+ {
+ public bool IsConditional { get; }
+
+ public X86Condition Condition { get; }
+
+ public BasicBlock Target { get; }
+
+ public long JumpPosition { get; }
+
+ public long RelativeOffset { get; set; }
+
+ public int InstSize { get; set; }
+
+ public Jump(BasicBlock target, long jumpPosition)
+ {
+ IsConditional = false;
+ Condition = 0;
+ Target = target;
+ JumpPosition = jumpPosition;
+
+ RelativeOffset = 0;
+
+ InstSize = 0;
+ }
+
+ public Jump(X86Condition condition, BasicBlock target, long jumpPosition)
+ {
+ IsConditional = true;
+ Condition = condition;
+ Target = target;
+ JumpPosition = jumpPosition;
+
+ RelativeOffset = 0;
+
+ InstSize = 0;
+ }
+ }
+
+ private List<Jump> _jumps;
+
+ private X86Condition _jNearCondition;
+
+ private long _jNearPosition;
+ private int _jNearLength;
+
+ public CodeGenContext(Stream stream, AllocationResult allocResult, int maxCallArgs, int blocksCount)
+ {
+ _stream = stream;
+
+ AllocResult = allocResult;
+
+ Assembler = new Assembler(stream);
+
+ CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int xmmSaveRegionSize);
+ XmmSaveRegionSize = xmmSaveRegionSize;
+
+ _blockOffsets = new long[blocksCount];
+
+ _jumps = new List<Jump>();
+ }
+
+ private int GetCallArgsRegionSize(AllocationResult allocResult, int maxCallArgs, out int xmmSaveRegionSize)
+ {
+ // We need to add 8 bytes to the total size, as the call to this
+ // function already pushed 8 bytes (the return address).
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters;
+
+ xmmSaveRegionSize = BitUtils.CountBits(vecMask) * 16;
+
+ int calleeSaveRegionSize = BitUtils.CountBits(intMask) * 8 + xmmSaveRegionSize + 8;
+
+ int argsCount = maxCallArgs;
+
+ if (argsCount < 0)
+ {
+ // When the function has no calls, argsCount is -1.
+ // In this case, we don't need to allocate the shadow space.
+ argsCount = 0;
+ }
+ else if (argsCount < 4)
+ {
+ // The ABI mandates that the space for at least 4 arguments
+ // is reserved on the stack (this is called shadow space).
+ argsCount = 4;
+ }
+
+ int frameSize = calleeSaveRegionSize + allocResult.SpillRegionSize;
+
+ // TODO: Instead of always multiplying by 16 (the largest possible size of a variable,
+ // since a V128 has 16 bytes), we should calculate the exact size consumed by the
+ // arguments passed to the called functions on the stack.
+ int callArgsAndFrameSize = frameSize + argsCount * 16;
+
+ // Ensure that the Stack Pointer will be aligned to 16 bytes.
+ callArgsAndFrameSize = (callArgsAndFrameSize + 0xf) & ~0xf;
+
+ return callArgsAndFrameSize - frameSize;
+ }
+
+ public void EnterBlock(BasicBlock block)
+ {
+ _blockOffsets[block.Index] = _stream.Position;
+
+ CurrBlock = block;
+ }
+
+ public void JumpTo(BasicBlock target)
+ {
+ _jumps.Add(new Jump(target, _stream.Position));
+
+ WritePadding(ReservedBytesForJump);
+ }
+
+ public void JumpTo(X86Condition condition, BasicBlock target)
+ {
+ _jumps.Add(new Jump(condition, target, _stream.Position));
+
+ WritePadding(ReservedBytesForJump);
+ }
+
+ public void JumpToNear(X86Condition condition)
+ {
+ _jNearCondition = condition;
+ _jNearPosition = _stream.Position;
+ _jNearLength = Assembler.GetJccLength(0);
+
+ _stream.Seek(_jNearLength, SeekOrigin.Current);
+ }
+
+ public void JumpHere()
+ {
+ long currentPosition = _stream.Position;
+
+ _stream.Seek(_jNearPosition, SeekOrigin.Begin);
+
+ long offset = currentPosition - (_jNearPosition + _jNearLength);
+
+ Debug.Assert(_jNearLength == Assembler.GetJccLength(offset), "Relative offset doesn't fit on near jump.");
+
+ Assembler.Jcc(_jNearCondition, offset);
+
+ _stream.Seek(currentPosition, SeekOrigin.Begin);
+ }
+
+ private void WritePadding(int size)
+ {
+ while (size-- > 0)
+ {
+ _stream.WriteByte(0);
+ }
+ }
+
+ public byte[] GetCode()
+ {
+ // Write jump relative offsets.
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int index = 0; index < _jumps.Count; index++)
+ {
+ Jump jump = _jumps[index];
+
+ long jumpTarget = _blockOffsets[jump.Target.Index];
+
+ long offset = jumpTarget - jump.JumpPosition;
+
+ if (offset < 0)
+ {
+ for (int index2 = index - 1; index2 >= 0; index2--)
+ {
+ Jump jump2 = _jumps[index2];
+
+ if (jump2.JumpPosition < jumpTarget)
+ {
+ break;
+ }
+
+ offset -= jump2.InstSize - ReservedBytesForJump;
+ }
+ }
+ else
+ {
+ for (int index2 = index + 1; index2 < _jumps.Count; index2++)
+ {
+ Jump jump2 = _jumps[index2];
+
+ if (jump2.JumpPosition >= jumpTarget)
+ {
+ break;
+ }
+
+ offset += jump2.InstSize - ReservedBytesForJump;
+ }
+
+ offset -= ReservedBytesForJump;
+ }
+
+ if (jump.IsConditional)
+ {
+ jump.InstSize = Assembler.GetJccLength(offset);
+ }
+ else
+ {
+ jump.InstSize = Assembler.GetJmpLength(offset);
+ }
+
+ // The jump is relative to the next instruction, not the current one.
+ // Since we didn't know the next instruction address when calculating
+ // the offset (as the size of the current jump instruction was not know),
+ // we now need to compensate the offset with the jump instruction size.
+ // It's also worth to note that:
+ // - This is only needed for backward jumps.
+ // - The GetJmpLength and GetJccLength also compensates the offset
+ // internally when computing the jump instruction size.
+ if (offset < 0)
+ {
+ offset -= jump.InstSize;
+ }
+
+ if (jump.RelativeOffset != offset)
+ {
+ modified = true;
+ }
+
+ jump.RelativeOffset = offset;
+
+ _jumps[index] = jump;
+ }
+ }
+ while (modified);
+
+ // Write the code, ignoring the dummy bytes after jumps, into a new stream.
+ _stream.Seek(0, SeekOrigin.Begin);
+
+ using (MemoryStream codeStream = new MemoryStream())
+ {
+ Assembler assembler = new Assembler(codeStream);
+
+ byte[] buffer;
+
+ for (int index = 0; index < _jumps.Count; index++)
+ {
+ Jump jump = _jumps[index];
+
+ buffer = new byte[jump.JumpPosition - _stream.Position];
+
+ _stream.Read(buffer, 0, buffer.Length);
+ _stream.Seek(ReservedBytesForJump, SeekOrigin.Current);
+
+ codeStream.Write(buffer);
+
+ if (jump.IsConditional)
+ {
+ assembler.Jcc(jump.Condition, jump.RelativeOffset);
+ }
+ else
+ {
+ assembler.Jmp(jump.RelativeOffset);
+ }
+ }
+
+ buffer = new byte[_stream.Length - _stream.Position];
+
+ _stream.Read(buffer, 0, buffer.Length);
+
+ codeStream.Write(buffer);
+
+ return codeStream.ToArray();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
new file mode 100644
index 00000000..ae24b563
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -0,0 +1,1661 @@
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CodeGenerator
+ {
+ private const int PageSize = 0x1000;
+ private const int StackGuardSize = 0x2000;
+
+ private static Action<CodeGenContext, Operation>[] _instTable;
+
+ static CodeGenerator()
+ {
+ _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))];
+
+ Add(Instruction.Add, GenerateAdd);
+ Add(Instruction.BitwiseAnd, GenerateBitwiseAnd);
+ Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr);
+ Add(Instruction.BitwiseNot, GenerateBitwiseNot);
+ Add(Instruction.BitwiseOr, GenerateBitwiseOr);
+ Add(Instruction.Branch, GenerateBranch);
+ Add(Instruction.BranchIfFalse, GenerateBranchIfFalse);
+ Add(Instruction.BranchIfTrue, GenerateBranchIfTrue);
+ Add(Instruction.ByteSwap, GenerateByteSwap);
+ Add(Instruction.Call, GenerateCall);
+ Add(Instruction.Clobber, GenerateClobber);
+ Add(Instruction.CompareAndSwap128, GenerateCompareAndSwap128);
+ Add(Instruction.CompareEqual, GenerateCompareEqual);
+ Add(Instruction.CompareGreater, GenerateCompareGreater);
+ Add(Instruction.CompareGreaterOrEqual, GenerateCompareGreaterOrEqual);
+ Add(Instruction.CompareGreaterOrEqualUI, GenerateCompareGreaterOrEqualUI);
+ Add(Instruction.CompareGreaterUI, GenerateCompareGreaterUI);
+ Add(Instruction.CompareLess, GenerateCompareLess);
+ Add(Instruction.CompareLessOrEqual, GenerateCompareLessOrEqual);
+ Add(Instruction.CompareLessOrEqualUI, GenerateCompareLessOrEqualUI);
+ Add(Instruction.CompareLessUI, GenerateCompareLessUI);
+ Add(Instruction.CompareNotEqual, GenerateCompareNotEqual);
+ Add(Instruction.ConditionalSelect, GenerateConditionalSelect);
+ Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32);
+ Add(Instruction.ConvertToFP, GenerateConvertToFP);
+ Add(Instruction.Copy, GenerateCopy);
+ Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
+ Add(Instruction.CpuId, GenerateCpuId);
+ Add(Instruction.Divide, GenerateDivide);
+ Add(Instruction.DivideUI, GenerateDivideUI);
+ Add(Instruction.Fill, GenerateFill);
+ Add(Instruction.Load, GenerateLoad);
+ Add(Instruction.Load16, GenerateLoad16);
+ Add(Instruction.Load8, GenerateLoad8);
+ Add(Instruction.Multiply, GenerateMultiply);
+ Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI);
+ Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI);
+ Add(Instruction.Negate, GenerateNegate);
+ Add(Instruction.Return, GenerateReturn);
+ Add(Instruction.RotateRight, GenerateRotateRight);
+ Add(Instruction.ShiftLeft, GenerateShiftLeft);
+ Add(Instruction.ShiftRightSI, GenerateShiftRightSI);
+ Add(Instruction.ShiftRightUI, GenerateShiftRightUI);
+ Add(Instruction.SignExtend16, GenerateSignExtend16);
+ Add(Instruction.SignExtend32, GenerateSignExtend32);
+ Add(Instruction.SignExtend8, GenerateSignExtend8);
+ Add(Instruction.Spill, GenerateSpill);
+ Add(Instruction.SpillArg, GenerateSpillArg);
+ Add(Instruction.StackAlloc, GenerateStackAlloc);
+ Add(Instruction.Store, GenerateStore);
+ Add(Instruction.Store16, GenerateStore16);
+ Add(Instruction.Store8, GenerateStore8);
+ Add(Instruction.Subtract, GenerateSubtract);
+ Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar);
+ Add(Instruction.VectorExtract, GenerateVectorExtract);
+ Add(Instruction.VectorExtract16, GenerateVectorExtract16);
+ Add(Instruction.VectorExtract8, GenerateVectorExtract8);
+ Add(Instruction.VectorInsert, GenerateVectorInsert);
+ Add(Instruction.VectorInsert16, GenerateVectorInsert16);
+ Add(Instruction.VectorInsert8, GenerateVectorInsert8);
+ Add(Instruction.VectorOne, GenerateVectorOne);
+ Add(Instruction.VectorZero, GenerateVectorZero);
+ Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64);
+ Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96);
+ Add(Instruction.ZeroExtend16, GenerateZeroExtend16);
+ Add(Instruction.ZeroExtend32, GenerateZeroExtend32);
+ Add(Instruction.ZeroExtend8, GenerateZeroExtend8);
+ }
+
+ private static void Add(Instruction inst, Action<CodeGenContext, Operation> func)
+ {
+ _instTable[(int)inst] = func;
+ }
+
+ public static CompiledFunction Generate(CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+
+ Logger.StartPass(PassName.Optimization);
+
+ if ((cctx.Options & CompilerOptions.SsaForm) != 0 &&
+ (cctx.Options & CompilerOptions.Optimize) != 0)
+ {
+ Optimizer.RunPass(cfg);
+ }
+
+ Logger.EndPass(PassName.Optimization, cfg);
+
+ Logger.StartPass(PassName.PreAllocation);
+
+ StackAllocator stackAlloc = new StackAllocator();
+
+ PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs);
+
+ Logger.EndPass(PassName.PreAllocation, cfg);
+
+ Logger.StartPass(PassName.RegisterAllocation);
+
+ if ((cctx.Options & CompilerOptions.SsaForm) != 0)
+ {
+ Ssa.Deconstruct(cfg);
+ }
+
+ IRegisterAllocator regAlloc;
+
+ if ((cctx.Options & CompilerOptions.Lsra) != 0)
+ {
+ regAlloc = new LinearScanAllocator();
+ }
+ else
+ {
+ regAlloc = new HybridAllocator();
+ }
+
+ RegisterMasks regMasks = new RegisterMasks(
+ CallingConvention.GetIntAvailableRegisters(),
+ CallingConvention.GetVecAvailableRegisters(),
+ CallingConvention.GetIntCallerSavedRegisters(),
+ CallingConvention.GetVecCallerSavedRegisters(),
+ CallingConvention.GetIntCalleeSavedRegisters(),
+ CallingConvention.GetVecCalleeSavedRegisters());
+
+ AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
+
+ Logger.EndPass(PassName.RegisterAllocation, cfg);
+
+ Logger.StartPass(PassName.CodeGeneration);
+
+ using (MemoryStream stream = new MemoryStream())
+ {
+ CodeGenContext context = new CodeGenContext(stream, allocResult, maxCallArgs, cfg.Blocks.Count);
+
+ UnwindInfo unwindInfo = WritePrologue(context);
+
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ context.EnterBlock(block);
+
+ foreach (Node node in block.Operations)
+ {
+ if (node is Operation operation)
+ {
+ GenerateOperation(context, operation);
+ }
+ }
+ }
+
+ Logger.EndPass(PassName.CodeGeneration);
+
+ return new CompiledFunction(context.GetCode(), unwindInfo);
+ }
+ }
+
+ private static void GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ if (operation.Instruction == Instruction.Extended)
+ {
+ IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
+
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrinOp.Intrinsic);
+
+ switch (info.Type)
+ {
+ case IntrinsicType.Comis_:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ switch (intrinOp.Intrinsic)
+ {
+ case Intrinsic.X86Comisdeq:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Equal);
+ break;
+
+ case Intrinsic.X86Comisdge:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+ break;
+
+ case Intrinsic.X86Comisdlt:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Below);
+ break;
+
+ case Intrinsic.X86Comisseq:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Equal);
+ break;
+
+ case Intrinsic.X86Comissge:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+ break;
+
+ case Intrinsic.X86Comisslt:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Below);
+ break;
+ }
+
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+
+ break;
+ }
+
+ case IntrinsicType.PopCount:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Popcnt(dest, source, dest.Type);
+
+ break;
+ }
+
+ case IntrinsicType.Unary:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, source);
+
+ break;
+ }
+
+ case IntrinsicType.UnaryToGpr:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
+
+ break;
+ }
+
+ case IntrinsicType.Binary:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger());
+ Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+
+ break;
+ }
+
+ case IntrinsicType.BinaryImm:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte());
+
+ break;
+ }
+
+ case IntrinsicType.Ternary:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src1, src2, src3);
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding)
+ {
+ context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3);
+ }
+ else
+ {
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src3.GetRegister().Index == 0);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+ }
+
+ break;
+ }
+
+ case IntrinsicType.TernaryImm:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src1, src2);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte());
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction];
+
+ if (func != null)
+ {
+ func(context, operation);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\".");
+ }
+ }
+ }
+
+ private static void GenerateAdd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Add(dest, src2, dest.Type);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Addss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Addsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.And(dest, src2, dest.Type);
+ }
+
+ private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Xor(dest, src2, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Xorps(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseNot(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Not(dest);
+ }
+
+ private static void GenerateBitwiseOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Or(dest, src2, dest.Type);
+ }
+
+ private static void GenerateBranch(CodeGenContext context, Operation operation)
+ {
+ context.JumpTo(context.CurrBlock.Branch);
+ }
+
+ private static void GenerateBranchIfFalse(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(0);
+
+ context.Assembler.Test(source, source, source.Type);
+
+ context.JumpTo(X86Condition.Equal, context.CurrBlock.Branch);
+ }
+
+ private static void GenerateBranchIfTrue(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(0);
+
+ context.Assembler.Test(source, source, source.Type);
+
+ context.JumpTo(X86Condition.NotEqual, context.CurrBlock.Branch);
+ }
+
+ private static void GenerateByteSwap(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Bswap(dest);
+ }
+
+ private static void GenerateCall(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Call(operation.GetSource(0));
+ }
+
+ private static void GenerateClobber(CodeGenContext context, Operation operation)
+ {
+ // This is only used to indicate that a register is clobbered to the
+ // register allocator, we don't need to produce any code.
+ }
+
+ private static void GenerateCompareAndSwap128(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(0);
+
+ MemoryOperand memOp = new MemoryOperand(OperandType.I64, source);
+
+ context.Assembler.Cmpxchg16b(memOp);
+ }
+
+ private static void GenerateCompareEqual(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.Equal);
+ }
+
+ private static void GenerateCompareGreater(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.Greater);
+ }
+
+ private static void GenerateCompareGreaterOrEqual(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.GreaterOrEqual);
+ }
+
+ private static void GenerateCompareGreaterOrEqualUI(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.AboveOrEqual);
+ }
+
+ private static void GenerateCompareGreaterUI(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.Above);
+ }
+
+ private static void GenerateCompareLess(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.Less);
+ }
+
+ private static void GenerateCompareLessOrEqual(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.LessOrEqual);
+ }
+
+ private static void GenerateCompareLessOrEqualUI(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.BelowOrEqual);
+ }
+
+ private static void GenerateCompareLessUI(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.Below);
+ }
+
+ private static void GenerateCompareNotEqual(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.NotEqual);
+ }
+
+ private static void GenerateCompare(CodeGenContext context, Operation operation, X86Condition condition)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(src1, src2);
+
+ Debug.Assert(dest.Type == OperandType.I32);
+
+ context.Assembler.Cmp(src1, src2, src1.Type);
+ context.Assembler.Setcc(dest, condition);
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+ }
+
+ private static void GenerateConditionalSelect(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameReg (dest, src3);
+ EnsureSameType(dest, src2, src3);
+
+ Debug.Assert(dest.Type.IsInteger());
+ Debug.Assert(src1.Type == OperandType.I32);
+
+ context.Assembler.Test (src1, src1, src1.Type);
+ context.Assembler.Cmovcc(dest, src2, dest.Type, X86Condition.NotEqual);
+ }
+
+ private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64);
+
+ context.Assembler.Mov(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateConvertToFP(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+
+ if (dest.Type == OperandType.FP32)
+ {
+ Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP64);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.Xorps (dest, dest, dest);
+ context.Assembler.Cvtsi2ss(dest, dest, source, source.Type);
+ }
+ else /* if (source.Type == OperandType.FP64) */
+ {
+ context.Assembler.Cvtsd2ss(dest, dest, source);
+
+ GenerateZeroUpper96(context, dest, dest);
+ }
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP32);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.Xorps (dest, dest, dest);
+ context.Assembler.Cvtsi2sd(dest, dest, source, source.Type);
+ }
+ else /* if (source.Type == OperandType.FP32) */
+ {
+ context.Assembler.Cvtss2sd(dest, dest, source);
+
+ GenerateZeroUpper64(context, dest, dest);
+ }
+ }
+ }
+
+ private static void GenerateCopy(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
+
+ // Moves to the same register are useless.
+ if (dest.Kind == source.Kind && dest.Value == source.Value)
+ {
+ return;
+ }
+
+ if (dest.Kind == OperandKind.Register &&
+ source.Kind == OperandKind.Constant && source.Value == 0)
+ {
+ // Assemble "mov reg, 0" as "xor reg, reg" as the later is more efficient.
+ context.Assembler.Xor(dest, dest, OperandType.I32);
+ }
+ else if (dest.Type.IsInteger())
+ {
+ context.Assembler.Mov(dest, source, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Movdqu(dest, source);
+ }
+ }
+
+ private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Bsr(dest, source, dest.Type);
+
+ int operandSize = dest.Type == OperandType.I32 ? 32 : 64;
+ int operandMask = operandSize - 1;
+
+ // When the input operand is 0, the result is undefined, however the
+ // ZF flag is set. We are supposed to return the operand size on that
+ // case. So, add an additional jump to handle that case, by moving the
+ // operand size constant to the destination register.
+ context.JumpToNear(X86Condition.NotEqual);
+
+ context.Assembler.Mov(dest, new Operand(operandSize | operandMask), OperandType.I32);
+
+ context.JumpHere();
+
+ // BSR returns the zero based index of the last bit set on the operand,
+ // starting from the least significant bit. However we are supposed to
+ // return the number of 0 bits on the high end. So, we invert the result
+ // of the BSR using XOR to get the correct value.
+ context.Assembler.Xor(dest, new Operand(operandMask), OperandType.I32);
+ }
+
+ private static void GenerateCpuId(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Cpuid();
+ }
+
+ private static void GenerateDivide(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+
+ if (!dest.Type.IsInteger())
+ {
+ ValidateBinOp(dest, dividend, divisor);
+ }
+
+ if (dest.Type.IsInteger())
+ {
+ divisor = operation.GetSource(2);
+
+ EnsureSameType(dest, divisor);
+
+ if (divisor.Type == OperandType.I32)
+ {
+ context.Assembler.Cdq();
+ }
+ else
+ {
+ context.Assembler.Cqo();
+ }
+
+ context.Assembler.Idiv(divisor);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Divss(dest, dividend, divisor);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Divsd(dest, dividend, divisor);
+ }
+ }
+
+ private static void GenerateDivideUI(CodeGenContext context, Operation operation)
+ {
+ Operand divisor = operation.GetSource(2);
+
+ Operand rdx = Register(X86Register.Rdx);
+
+ Debug.Assert(divisor.Type.IsInteger());
+
+ context.Assembler.Xor(rdx, rdx, OperandType.I32);
+ context.Assembler.Div(divisor);
+ }
+
+ private static void GenerateFill(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ MemoryOperand memOp = new MemoryOperand(dest.Type, rsp, null, Multiplier.x1, offs);
+
+ GenerateLoad(context, memOp, dest);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ GenerateLoad(context, address, value);
+ }
+
+ private static void GenerateLoad16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Movzx16(value, address, value.Type);
+ }
+
+ private static void GenerateLoad8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Movzx8(value, address, value.Type);
+ }
+
+ private static void GenerateMultiply(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ if (src2.Kind != OperandKind.Constant)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ EnsureSameType(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ context.Assembler.Imul(dest, src1, src2, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Imul(dest, src2, dest.Type);
+ }
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Mulss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Mulsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(source.Type == OperandType.I64);
+
+ context.Assembler.Imul(source);
+ }
+
+ private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(source.Type == OperandType.I64);
+
+ context.Assembler.Mul(source);
+ }
+
+ private static void GenerateNegate(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Neg(dest);
+ }
+
+ private static void GenerateReturn(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Return();
+ }
+
+ private static void GenerateRotateRight(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Ror(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftLeft(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Shl(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftRightSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Sar(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftRightUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Shr(dest, src2, dest.Type);
+ }
+
+ private static void GenerateSignExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx16(dest, source, dest.Type);
+ }
+
+ private static void GenerateSignExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx32(dest, source, dest.Type);
+ }
+
+ private static void GenerateSignExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx8(dest, source, dest.Type);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, context.CallArgsRegionSize);
+ }
+
+ private static void GenerateSpillArg(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, 0);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset)
+ {
+ Operand offset = operation.GetSource(0);
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + baseOffset;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ MemoryOperand memOp = new MemoryOperand(source.Type, rsp, null, Multiplier.x1, offs);
+
+ GenerateStore(context, memOp, source);
+ }
+
+ private static void GenerateStackAlloc(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ MemoryOperand memOp = new MemoryOperand(OperandType.I64, rsp, null, Multiplier.x1, offs);
+
+ context.Assembler.Lea(dest, memOp, OperandType.I64);
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ GenerateStore(context, address, value);
+ }
+
+ private static void GenerateStore16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Mov16(address, value);
+ }
+
+ private static void GenerateStore8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Mov8(address, value);
+ }
+
+ private static void GenerateSubtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sub(dest, src2, dest.Type);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Subss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Subsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger());
+
+ if (source.Type == OperandType.I32)
+ {
+ context.Assembler.Movd(dest, source);
+ }
+ else /* if (source.Type == OperandType.I64) */
+ {
+ context.Assembler.Movq(dest, source);
+ }
+ }
+
+ private static void GenerateVectorExtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ if (dest.Type == OperandType.I32)
+ {
+ Debug.Assert(index < 4);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrd(dest, src1, index);
+ }
+ else
+ {
+ if (index != 0)
+ {
+ int mask0 = 0b11_10_01_00;
+ int mask1 = 0b11_10_01_00;
+
+ mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
+ mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+ context.Assembler.Pshufd(src1, src1, (byte)mask0);
+ context.Assembler.Movd (dest, src1);
+ context.Assembler.Pshufd(src1, src1, (byte)mask1);
+ }
+ else
+ {
+ context.Assembler.Movd(dest, src1);
+ }
+ }
+ }
+ else if (dest.Type == OperandType.I64)
+ {
+ Debug.Assert(index < 2);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrq(dest, src1, index);
+ }
+ else
+ {
+ if (index != 0)
+ {
+ const byte mask = 0b01_00_11_10;
+
+ context.Assembler.Pshufd(src1, src1, mask);
+ context.Assembler.Movq (dest, src1);
+ context.Assembler.Pshufd(src1, src1, mask);
+ }
+ else
+ {
+ context.Assembler.Movq(dest, src1);
+ }
+ }
+ }
+ else
+ {
+ Debug.Assert(index < (dest.Type == OperandType.FP32 ? 4 : 2));
+
+ // Floating-point types.
+ if ((index >= 2 && dest.Type == OperandType.FP32) ||
+ (index == 1 && dest.Type == OperandType.FP64))
+ {
+ context.Assembler.Movhlps(dest, dest, src1);
+ context.Assembler.Movq (dest, dest);
+ }
+ else
+ {
+ context.Assembler.Movq(dest, src1);
+ }
+
+ if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Pshufd(dest, dest, (byte)(0xfc | (index & 1)));
+ }
+ }
+ }
+
+ private static void GenerateVectorExtract16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 8);
+
+ context.Assembler.Pextrw(dest, src1, index);
+ }
+
+ private static void GenerateVectorExtract8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 16);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrb(dest, src1, index);
+ }
+ else
+ {
+ context.Assembler.Pextrw(dest, src1, (byte)(index >> 1));
+
+ if ((index & 1) != 0)
+ {
+ context.Assembler.Shr(dest, new Operand(8), OperandType.I32);
+ }
+ else
+ {
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+ }
+ }
+ }
+
+ private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ void InsertIntSse2(int words)
+ {
+ if (dest.GetRegister() != src1.GetRegister())
+ {
+ context.Assembler.Movdqu(dest, src1);
+ }
+
+ for (int word = 0; word < words; word++)
+ {
+ // Insert lower 16-bits.
+ context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word));
+
+ // Move next word down.
+ context.Assembler.Ror(src2, new Operand(16), src2.Type);
+ }
+ }
+
+ if (src2.Type == OperandType.I32)
+ {
+ Debug.Assert(index < 4);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pinsrd(dest, src1, src2, index);
+ }
+ else
+ {
+ InsertIntSse2(2);
+ }
+ }
+ else if (src2.Type == OperandType.I64)
+ {
+ Debug.Assert(index < 2);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pinsrq(dest, src1, src2, index);
+ }
+ else
+ {
+ InsertIntSse2(4);
+ }
+ }
+ else if (src2.Type == OperandType.FP32)
+ {
+ Debug.Assert(index < 4);
+
+ if (index != 0)
+ {
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4));
+ }
+ else
+ {
+ if (src1.GetRegister() == src2.GetRegister())
+ {
+ int mask = 0b11_10_01_00;
+
+ mask &= ~(0b11 << index * 2);
+
+ context.Assembler.Pshufd(dest, src1, (byte)mask);
+ }
+ else
+ {
+ int mask0 = 0b11_10_01_00;
+ int mask1 = 0b11_10_01_00;
+
+ mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
+ mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+ context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0.
+ context.Assembler.Movss (dest, src1, src2); // dest[127:0] = src1[127:32] | src2[31:0]
+ context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position.
+
+ if (dest.GetRegister() != src1.GetRegister())
+ {
+ context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1.
+ }
+ }
+ }
+ }
+ else
+ {
+ context.Assembler.Movss(dest, src1, src2);
+ }
+ }
+ else /* if (src2.Type == OperandType.FP64) */
+ {
+ Debug.Assert(index < 2);
+
+ if (index != 0)
+ {
+ context.Assembler.Movlhps(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.Movsd(dest, src1, src2);
+ }
+ }
+ }
+
+ private static void GenerateVectorInsert16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Pinsrw(dest, src1, src2, index);
+ }
+
+ private static void GenerateVectorInsert8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ // It's not possible to emulate this instruction without
+ // SSE 4.1 support without the use of a temporary register,
+ // so we instead handle that case on the pre-allocator when
+ // SSE 4.1 is not supported on the CPU.
+ Debug.Assert(HardwareCapabilities.SupportsSse41);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Pinsrb(dest, src1, src2, index);
+ }
+
+ private static void GenerateVectorOne(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.Pcmpeqw(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZero(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.Xorps(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ GenerateZeroUpper64(context, dest, source);
+ }
+
+ private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ GenerateZeroUpper96(context, dest, source);
+ }
+
+ private static void GenerateZeroExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movzx16(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateZeroExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Mov(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateZeroExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movzx8(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operand address, Operand value)
+ {
+ switch (value.Type)
+ {
+ case OperandType.I32: context.Assembler.Mov (value, address, OperandType.I32); break;
+ case OperandType.I64: context.Assembler.Mov (value, address, OperandType.I64); break;
+ case OperandType.FP32: context.Assembler.Movd (value, address); break;
+ case OperandType.FP64: context.Assembler.Movq (value, address); break;
+ case OperandType.V128: context.Assembler.Movdqu(value, address); break;
+
+ default: Debug.Assert(false); break;
+ }
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operand address, Operand value)
+ {
+ switch (value.Type)
+ {
+ case OperandType.I32: context.Assembler.Mov (address, value, OperandType.I32); break;
+ case OperandType.I64: context.Assembler.Mov (address, value, OperandType.I64); break;
+ case OperandType.FP32: context.Assembler.Movd (address, value); break;
+ case OperandType.FP64: context.Assembler.Movq (address, value); break;
+ case OperandType.V128: context.Assembler.Movdqu(address, value); break;
+
+ default: Debug.Assert(false); break;
+ }
+ }
+
+ private static void GenerateZeroUpper64(CodeGenContext context, Operand dest, Operand source)
+ {
+ context.Assembler.Movq(dest, source);
+ }
+
+ private static void GenerateZeroUpper96(CodeGenContext context, Operand dest, Operand source)
+ {
+ context.Assembler.Movq(dest, source);
+ context.Assembler.Pshufd(dest, dest, 0xfc);
+ }
+
+ private static void ValidateUnOp(Operand dest, Operand source)
+ {
+#if DEBUG
+ EnsureSameReg (dest, source);
+ EnsureSameType(dest, source);
+#endif
+ }
+
+ private static void ValidateBinOp(Operand dest, Operand src1, Operand src2)
+ {
+#if DEBUG
+ EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1, src2);
+#endif
+ }
+
+ private static void ValidateShift(Operand dest, Operand src1, Operand src2)
+ {
+#if DEBUG
+ EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32);
+#endif
+ }
+
+ private static void EnsureSameReg(Operand op1, Operand op2)
+ {
+ if (!op1.Type.IsInteger() && HardwareCapabilities.SupportsVexEncoding)
+ {
+ return;
+ }
+
+ Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory);
+ Debug.Assert(op1.Kind == op2.Kind);
+ Debug.Assert(op1.Value == op2.Value);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ Debug.Assert(op1.Type == op4.Type);
+ }
+
+ private static UnwindInfo WritePrologue(CodeGenContext context)
+ {
+ List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>();
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.LowestBitSet(mask);
+
+ context.Assembler.Push(Register((X86Register)bit));
+
+ pushEntries.Add(new UnwindPushEntry(bit, RegisterType.Integer, context.StreamOffset));
+
+ mask &= ~(1 << bit);
+ }
+
+ int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+ reservedStackSize += context.XmmSaveRegionSize;
+
+ if (reservedStackSize >= StackGuardSize)
+ {
+ GenerateInlineStackProbe(context, reservedStackSize);
+ }
+
+ if (reservedStackSize != 0)
+ {
+ context.Assembler.Sub(rsp, new Operand(reservedStackSize), OperandType.I64);
+ }
+
+ int offset = reservedStackSize;
+
+ mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.LowestBitSet(mask);
+
+ offset -= 16;
+
+ MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset);
+
+ context.Assembler.Movdqu(memOp, Xmm((X86Register)bit));
+
+ pushEntries.Add(new UnwindPushEntry(bit, RegisterType.Vector, context.StreamOffset));
+
+ mask &= ~(1 << bit);
+ }
+
+ return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset, reservedStackSize);
+ }
+
+ private static void WriteEpilogue(CodeGenContext context)
+ {
+ Operand rsp = Register(X86Register.Rsp);
+
+ int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+ reservedStackSize += context.XmmSaveRegionSize;
+
+ int offset = reservedStackSize;
+
+ int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.LowestBitSet(mask);
+
+ offset -= 16;
+
+ MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset);
+
+ context.Assembler.Movdqu(Xmm((X86Register)bit), memOp);
+
+ mask &= ~(1 << bit);
+ }
+
+ if (reservedStackSize != 0)
+ {
+ context.Assembler.Add(rsp, new Operand(reservedStackSize), OperandType.I64);
+ }
+
+ mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.HighestBitSet(mask);
+
+ context.Assembler.Pop(Register((X86Register)bit));
+
+ mask &= ~(1 << bit);
+ }
+ }
+
+ private static void GenerateInlineStackProbe(CodeGenContext context, int size)
+ {
+ // Windows does lazy stack allocation, and there are just 2
+ // guard pages on the end of the stack. So, if the allocation
+ // size we make is greater than this guard size, we must ensure
+ // that the OS will map all pages that we'll use. We do that by
+ // doing a dummy read on those pages, forcing a page fault and
+ // the OS to map them. If they are already mapped, nothing happens.
+ const int pageMask = PageSize - 1;
+
+ size = (size + pageMask) & ~pageMask;
+
+ Operand rsp = Register(X86Register.Rsp);
+ Operand temp = Register(CallingConvention.GetIntReturnRegister());
+
+ for (int offset = PageSize; offset < size; offset += PageSize)
+ {
+ Operand memOp = new MemoryOperand(OperandType.I32, rsp, null, Multiplier.x1, -offset);;
+
+ context.Assembler.Mov(temp, memOp, OperandType.I32);
+ }
+ }
+
+ private static MemoryOperand Memory(Operand operand, OperandType type)
+ {
+ if (operand.Kind == OperandKind.Memory)
+ {
+ return operand as MemoryOperand;
+ }
+
+ return new MemoryOperand(type, operand);
+ }
+
+ private static Operand Register(X86Register register, OperandType type = OperandType.I64)
+ {
+ return new Operand((int)register, RegisterType.Integer, type);
+ }
+
+ private static Operand Xmm(X86Register register)
+ {
+ return new Operand((int)register, RegisterType.Vector, OperandType.V128);
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
new file mode 100644
index 00000000..7f930d6b
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
@@ -0,0 +1,52 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class HardwareCapabilities
+ {
+ private delegate ulong GetFeatureInfo();
+
+ private static ulong _featureInfo;
+
+ public static bool SupportsSse3 => (_featureInfo & (1UL << 0)) != 0;
+ public static bool SupportsPclmulqdq => (_featureInfo & (1UL << 1)) != 0;
+ public static bool SupportsSsse3 => (_featureInfo & (1UL << 9)) != 0;
+ public static bool SupportsFma => (_featureInfo & (1UL << 12)) != 0;
+ public static bool SupportsCx16 => (_featureInfo & (1UL << 13)) != 0;
+ public static bool SupportsSse41 => (_featureInfo & (1UL << 19)) != 0;
+ public static bool SupportsSse42 => (_featureInfo & (1UL << 20)) != 0;
+ public static bool SupportsPopcnt => (_featureInfo & (1UL << 23)) != 0;
+ public static bool SupportsAesni => (_featureInfo & (1UL << 25)) != 0;
+ public static bool SupportsAvx => (_featureInfo & (1UL << 28)) != 0;
+ public static bool SupportsF16c => (_featureInfo & (1UL << 29)) != 0;
+
+ public static bool SupportsSse => (_featureInfo & (1UL << 32 + 25)) != 0;
+ public static bool SupportsSse2 => (_featureInfo & (1UL << 32 + 26)) != 0;
+
+ public static bool ForceLegacySse { get; set; }
+
+ public static bool SupportsVexEncoding => !ForceLegacySse && SupportsAvx;
+
+ static HardwareCapabilities()
+ {
+ EmitterContext context = new EmitterContext();
+
+ Operand featureInfo = context.CpuId();
+
+ context.Return(featureInfo);
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[0];
+
+ GetFeatureInfo getFeatureInfo = Compiler.Compile<GetFeatureInfo>(
+ cfg,
+ argTypes,
+ OperandType.I64,
+ CompilerOptions.HighCq);
+
+ _featureInfo = getFeatureInfo();
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs b/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
new file mode 100644
index 00000000..b1af352b
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ struct IntrinsicInfo
+ {
+ public X86Instruction Inst { get; }
+ public IntrinsicType Type { get; }
+
+ public IntrinsicInfo(X86Instruction inst, IntrinsicType type)
+ {
+ Inst = inst;
+ Type = type;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
new file mode 100644
index 00000000..e225f254
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -0,0 +1,160 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class IntrinsicTable
+ {
+ private const int BadOp = 0;
+
+ private static IntrinsicInfo[] _intrinTable;
+
+ static IntrinsicTable()
+ {
+ _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
+
+ Add(Intrinsic.X86Addpd, new IntrinsicInfo(X86Instruction.Addpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Cmppd, new IntrinsicInfo(X86Instruction.Cmppd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpps, new IntrinsicInfo(X86Instruction.Cmpps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpsd, new IntrinsicInfo(X86Instruction.Cmpsd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpss, new IntrinsicInfo(X86Instruction.Cmpss, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Comisdeq, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisdge, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisdlt, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtpd2ps, new IntrinsicInfo(X86Instruction.Cvtpd2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtps2dq, new IntrinsicInfo(X86Instruction.Cvtps2dq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr));
+ Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divsd, new IntrinsicInfo(X86Instruction.Divsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divss, new IntrinsicInfo(X86Instruction.Divss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxss, new IntrinsicInfo(X86Instruction.Maxss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minpd, new IntrinsicInfo(X86Instruction.Minpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minps, new IntrinsicInfo(X86Instruction.Minps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minsd, new IntrinsicInfo(X86Instruction.Minsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pavgw, new IntrinsicInfo(X86Instruction.Pavgw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pblendvb, new IntrinsicInfo(X86Instruction.Pblendvb, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Pcmpeqb, new IntrinsicInfo(X86Instruction.Pcmpeqb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqd, new IntrinsicInfo(X86Instruction.Pcmpeqd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqq, new IntrinsicInfo(X86Instruction.Pcmpeqq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqw, new IntrinsicInfo(X86Instruction.Pcmpeqw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtb, new IntrinsicInfo(X86Instruction.Pcmpgtb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtd, new IntrinsicInfo(X86Instruction.Pcmpgtd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtq, new IntrinsicInfo(X86Instruction.Pcmpgtq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtw, new IntrinsicInfo(X86Instruction.Pcmpgtw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsb, new IntrinsicInfo(X86Instruction.Pmaxsb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsd, new IntrinsicInfo(X86Instruction.Pmaxsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsw, new IntrinsicInfo(X86Instruction.Pmaxsw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxub, new IntrinsicInfo(X86Instruction.Pmaxub, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxud, new IntrinsicInfo(X86Instruction.Pmaxud, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxuw, new IntrinsicInfo(X86Instruction.Pmaxuw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsb, new IntrinsicInfo(X86Instruction.Pminsb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsd, new IntrinsicInfo(X86Instruction.Pminsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsw, new IntrinsicInfo(X86Instruction.Pminsw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminub, new IntrinsicInfo(X86Instruction.Pminub, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminud, new IntrinsicInfo(X86Instruction.Pminud, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminuw, new IntrinsicInfo(X86Instruction.Pminuw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmovsxbw, new IntrinsicInfo(X86Instruction.Pmovsxbw, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovsxdq, new IntrinsicInfo(X86Instruction.Pmovsxdq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovsxwd, new IntrinsicInfo(X86Instruction.Pmovsxwd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxbw, new IntrinsicInfo(X86Instruction.Pmovzxbw, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxdq, new IntrinsicInfo(X86Instruction.Pmovzxdq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxwd, new IntrinsicInfo(X86Instruction.Pmovzxwd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmulld, new IntrinsicInfo(X86Instruction.Pmulld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmullw, new IntrinsicInfo(X86Instruction.Pmullw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount));
+ Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psllw, new IntrinsicInfo(X86Instruction.Psllw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrad, new IntrinsicInfo(X86Instruction.Psrad, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psraw, new IntrinsicInfo(X86Instruction.Psraw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrld, new IntrinsicInfo(X86Instruction.Psrld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrlq, new IntrinsicInfo(X86Instruction.Psrlq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrldq, new IntrinsicInfo(X86Instruction.Psrldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrlw, new IntrinsicInfo(X86Instruction.Psrlw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubb, new IntrinsicInfo(X86Instruction.Psubb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubd, new IntrinsicInfo(X86Instruction.Psubd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubq, new IntrinsicInfo(X86Instruction.Psubq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubw, new IntrinsicInfo(X86Instruction.Psubw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhbw, new IntrinsicInfo(X86Instruction.Punpckhbw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhdq, new IntrinsicInfo(X86Instruction.Punpckhdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhqdq, new IntrinsicInfo(X86Instruction.Punpckhqdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhwd, new IntrinsicInfo(X86Instruction.Punpckhwd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklbw, new IntrinsicInfo(X86Instruction.Punpcklbw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckldq, new IntrinsicInfo(X86Instruction.Punpckldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklqdq, new IntrinsicInfo(X86Instruction.Punpcklqdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklwd, new IntrinsicInfo(X86Instruction.Punpcklwd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pxor, new IntrinsicInfo(X86Instruction.Pxor, IntrinsicType.Binary));
+ Add(Intrinsic.X86Rcpps, new IntrinsicInfo(X86Instruction.Rcpps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Rcpss, new IntrinsicInfo(X86Instruction.Rcpss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Roundpd, new IntrinsicInfo(X86Instruction.Roundpd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundps, new IntrinsicInfo(X86Instruction.Roundps, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundsd, new IntrinsicInfo(X86Instruction.Roundsd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subss, new IntrinsicInfo(X86Instruction.Subss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpckhpd, new IntrinsicInfo(X86Instruction.Unpckhpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpckhps, new IntrinsicInfo(X86Instruction.Unpckhps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpcklpd, new IntrinsicInfo(X86Instruction.Unpcklpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary));
+ }
+
+ private static void Add(Intrinsic intrin, IntrinsicInfo info)
+ {
+ _intrinTable[(int)intrin] = info;
+ }
+
+ public static IntrinsicInfo GetInfo(Intrinsic intrin)
+ {
+ return _intrinTable[(int)intrin];
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/ARMeilleure/CodeGen/X86/IntrinsicType.cs
new file mode 100644
index 00000000..4e9b33e1
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/IntrinsicType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum IntrinsicType
+ {
+ Comis_,
+ PopCount,
+ Unary,
+ UnaryToGpr,
+ Binary,
+ BinaryImm,
+ Ternary,
+ TernaryImm
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs
new file mode 100644
index 00000000..a1490131
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs
@@ -0,0 +1,1280 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ using LLNode = LinkedListNode<Node>;
+
+ static class PreAllocator
+ {
+ public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
+ {
+ maxCallArgs = -1;
+
+ CallConvName callConv = CallingConvention.GetCurrentCallConv();
+
+ Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
+
+ foreach (BasicBlock block in cctx.Cfg.Blocks)
+ {
+ LLNode nextNode;
+
+ for (LLNode node = block.Operations.First; node != null; node = nextNode)
+ {
+ nextNode = node.Next;
+
+ if (!(node.Value is Operation operation))
+ {
+ continue;
+ }
+
+ HandleConstantCopy(node, operation);
+
+ HandleSameDestSrc1Copy(node, operation);
+
+ HandleFixedRegisterCopy(node, operation);
+
+ switch (operation.Instruction)
+ {
+ case Instruction.Call:
+ // Get the maximum number of arguments used on a call.
+ // On windows, when a struct is returned from the call,
+ // we also need to pass the pointer where the struct
+ // should be written on the first argument.
+ int argsCount = operation.SourcesCount - 1;
+
+ if (operation.Destination != null && operation.Destination.Type == OperandType.V128)
+ {
+ argsCount++;
+ }
+
+ if (maxCallArgs < argsCount)
+ {
+ maxCallArgs = argsCount;
+ }
+
+ // Copy values to registers expected by the function
+ // being called, as mandated by the ABI.
+ if (callConv == CallConvName.Windows)
+ {
+ node = HandleCallWindowsAbi(stackAlloc, node, operation);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ node = HandleCallSystemVAbi(node, operation);
+ }
+ break;
+
+ case Instruction.ConvertToFPUI:
+ HandleConvertToFPUI(node, operation);
+ break;
+
+ case Instruction.LoadArgument:
+ if (callConv == CallConvName.Windows)
+ {
+ HandleLoadArgumentWindowsAbi(cctx, node, preservedArgs, operation);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ HandleLoadArgumentSystemVAbi(cctx, node, preservedArgs, operation);
+ }
+ break;
+
+ case Instruction.Negate:
+ if (!operation.GetSource(0).Type.IsInteger())
+ {
+ node = HandleNegate(node, operation);
+ }
+ break;
+
+ case Instruction.Return:
+ if (callConv == CallConvName.Windows)
+ {
+ HandleReturnWindowsAbi(cctx, node, preservedArgs, operation);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ HandleReturnSystemVAbi(node, operation);
+ }
+ break;
+
+ case Instruction.VectorInsert8:
+ if (!HardwareCapabilities.SupportsSse41)
+ {
+ node = HandleVectorInsert8(node, operation);
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ private static void HandleConstantCopy(LLNode node, Operation operation)
+ {
+ if (operation.SourcesCount == 0 || IsIntrinsic(operation.Instruction))
+ {
+ return;
+ }
+
+ Instruction inst = operation.Instruction;
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2;
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ if (!src1.Type.IsInteger())
+ {
+ // Handle non-integer types (FP32, FP64 and V128).
+ // For instructions without an immediate operand, we do the following:
+ // - Insert a copy with the constant value (as integer) to a GPR.
+ // - Insert a copy from the GPR to a XMM register.
+ // - Replace the constant use with the XMM register.
+ src1 = AddXmmCopy(node, src1);
+
+ operation.SetSource(0, src1);
+ }
+ else if (!HasConstSrc1(inst))
+ {
+ // Handle integer types.
+ // Most ALU instructions accepts a 32-bits immediate on the second operand.
+ // We need to ensure the following:
+ // - If the constant is on operand 1, we need to move it.
+ // -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
+ // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
+ // - If the constant is on operand 2, we check if the instruction supports it,
+ // if not, we also add a copy. 64-bits constants are usually not supported.
+ if (IsCommutative(inst))
+ {
+ src2 = operation.GetSource(1);
+
+ Operand temp = src1;
+
+ src1 = src2;
+ src2 = temp;
+
+ operation.SetSource(0, src1);
+ operation.SetSource(1, src2);
+ }
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ src1 = AddCopy(node, src1);
+
+ operation.SetSource(0, src1);
+ }
+ }
+ }
+
+ if (operation.SourcesCount < 2)
+ {
+ return;
+ }
+
+ src2 = operation.GetSource(1);
+
+ if (src2.Kind == OperandKind.Constant)
+ {
+ if (!src2.Type.IsInteger())
+ {
+ src2 = AddXmmCopy(node, src2);
+
+ operation.SetSource(1, src2);
+ }
+ else if (!HasConstSrc2(inst) || IsLongConst(src2))
+ {
+ src2 = AddCopy(node, src2);
+
+ operation.SetSource(1, src2);
+ }
+ }
+ }
+
+ private static LLNode HandleFixedRegisterCopy(LLNode node, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ LinkedList<Node> nodes = node.List;
+
+ switch (operation.Instruction)
+ {
+ case Instruction.CompareAndSwap128:
+ {
+ // Handle the many restrictions of the compare and exchange (16 bytes) instruction:
+ // - The expected value should be in RDX:RAX.
+ // - The new value to be written should be in RCX:RBX.
+ // - The value at the memory location is loaded to RDX:RAX.
+ void SplitOperand(Operand source, Operand lr, Operand hr)
+ {
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, lr, source, Const(0)));
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1)));
+ }
+
+ Operand rax = Gpr(X86Register.Rax, OperandType.I64);
+ Operand rbx = Gpr(X86Register.Rbx, OperandType.I64);
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
+ Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
+
+ SplitOperand(operation.GetSource(1), rax, rdx);
+ SplitOperand(operation.GetSource(2), rbx, rcx);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, rax));
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1)));
+
+ operation.SetDestinations(new Operand[] { rdx, rax });
+
+ operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx });
+
+ break;
+ }
+
+ case Instruction.CpuId:
+ {
+ // Handle the many restrictions of the CPU Id instruction:
+ // - EAX controls the information returned by this instruction.
+ // - When EAX is 1, feature information is returned.
+ // - The information is written to registers EAX, EBX, ECX and EDX.
+ Debug.Assert(dest.Type == OperandType.I64);
+
+ Operand eax = Gpr(X86Register.Rax, OperandType.I32);
+ Operand ebx = Gpr(X86Register.Rbx, OperandType.I32);
+ Operand ecx = Gpr(X86Register.Rcx, OperandType.I32);
+ Operand edx = Gpr(X86Register.Rdx, OperandType.I32);
+
+ // Value 0x01 = Version, family and feature information.
+ nodes.AddBefore(node, new Operation(Instruction.Copy, eax, Const(1)));
+
+ // Copy results to the destination register.
+ // The values are split into 2 32-bits registers, we merge them
+ // into a single 64-bits register.
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, dest, edx));
+ node = nodes.AddAfter(node, new Operation(Instruction.ShiftLeft, dest, dest, Const(32)));
+ node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, dest, dest, rcx));
+
+ operation.SetDestinations(new Operand[] { eax, ebx, ecx, edx });
+
+ operation.SetSources(new Operand[] { eax });
+
+ break;
+ }
+
+ case Instruction.Divide:
+ case Instruction.DivideUI:
+ {
+ // Handle the many restrictions of the division instructions:
+ // - The dividend is always in RDX:RAX.
+ // - The result is always in RAX.
+ // - Additionally it also writes the remainder in RDX.
+ if (dest.Type.IsInteger())
+ {
+ Operand src1 = operation.GetSource(0);
+
+ Operand rax = Gpr(X86Register.Rax, src1.Type);
+ Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, rax, src1));
+ nodes.AddBefore(node, new Operation(Instruction.Clobber, rdx));
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rax));
+
+ operation.SetDestinations(new Operand[] { rdx, rax });
+
+ operation.SetSources(new Operand[] { rdx, rax, operation.GetSource(1) });
+
+ operation.Destination = rax;
+ }
+
+ break;
+ }
+
+ case Instruction.Extended:
+ {
+ IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
+
+ // PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
+ if (intrinOp.Intrinsic == Intrinsic.X86Pblendvb && !HardwareCapabilities.SupportsVexEncoding)
+ {
+ Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, xmm0, operation.GetSource(2)));
+
+ operation.SetSource(2, xmm0);
+ }
+
+ break;
+ }
+
+ case Instruction.Multiply64HighSI:
+ case Instruction.Multiply64HighUI:
+ {
+ // Handle the many restrictions of the i64 * i64 = i128 multiply instructions:
+ // - The multiplicand is always in RAX.
+ // - The lower 64-bits of the result is always in RAX.
+ // - The higher 64-bits of the result is always in RDX.
+ Operand src1 = operation.GetSource(0);
+
+ Operand rax = Gpr(X86Register.Rax, src1.Type);
+ Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, rax, src1));
+
+ operation.SetSource(0, rax);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rdx));
+
+ operation.SetDestinations(new Operand[] { rdx, rax });
+
+ break;
+ }
+
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ {
+ // The shift register is always implied to be CL (low 8-bits of RCX or ECX).
+ if (operation.GetSource(1).Kind == OperandKind.LocalVariable)
+ {
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I32);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, rcx, operation.GetSource(1)));
+
+ operation.SetSource(1, rcx);
+ }
+
+ break;
+ }
+ }
+
+ return node;
+ }
+
+ private static LLNode HandleSameDestSrc1Copy(LLNode node, Operation operation)
+ {
+ if (operation.Destination == null || operation.SourcesCount == 0)
+ {
+ return node;
+ }
+
+ Instruction inst = operation.Instruction;
+
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+
+ LinkedList<Node> nodes = node.List;
+
+ // The multiply instruction (that maps to IMUL) is somewhat special, it has
+ // a three operand form where the second source is a immediate value.
+ bool threeOperandForm = inst == Instruction.Multiply && operation.GetSource(1).Kind == OperandKind.Constant;
+
+ if (IsSameOperandDestSrc1(operation) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm)
+ {
+ bool useNewLocal = false;
+
+ for (int srcIndex = 1; srcIndex < operation.SourcesCount; srcIndex++)
+ {
+ if (operation.GetSource(srcIndex) == dest)
+ {
+ useNewLocal = true;
+
+ break;
+ }
+ }
+
+ if (useNewLocal)
+ {
+ // Dest is being used as some source already, we need to use a new
+ // local to store the temporary value, otherwise the value on dest
+ // local would be overwritten.
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, temp, src1));
+
+ operation.SetSource(0, temp);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, temp));
+
+ operation.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, new Operation(Instruction.Copy, dest, src1));
+
+ operation.SetSource(0, dest);
+ }
+ }
+ else if (inst == Instruction.ConditionalSelect)
+ {
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ if (src1 == dest || src2 == dest)
+ {
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, temp, src3));
+
+ operation.SetSource(2, temp);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, temp));
+
+ operation.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, new Operation(Instruction.Copy, dest, src3));
+
+ operation.SetSource(2, dest);
+ }
+ }
+
+ return node;
+ }
+
+ private static LLNode HandleConvertToFPUI(LLNode node, Operation operation)
+ {
+ // Unsigned integer to FP conversions are not supported on X86.
+ // We need to turn them into signed integer to FP conversions, and
+ // adjust the final result.
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\".");
+
+ LinkedList<Node> nodes = node.List;
+
+ LLNode currentNode = node;
+
+ if (source.Type == OperandType.I32)
+ {
+ // For 32-bits integers, we can just zero-extend to 64-bits,
+ // and then use the 64-bits signed conversion instructions.
+ Operand zex = Local(OperandType.I64);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, zex, source));
+ node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, dest, zex));
+ }
+ else /* if (source.Type == OperandType.I64) */
+ {
+ // For 64-bits integers, we need to do the following:
+ // - Ensure that the integer has the most significant bit clear.
+ // -- This can be done by shifting the value right by 1, that is, dividing by 2.
+ // -- The least significant bit is lost in this case though.
+ // - We can then convert the shifted value with a signed integer instruction.
+ // - The result still needs to be corrected after that.
+ // -- First, we need to multiply the result by 2, as we divided it by 2 before.
+ // --- This can be done efficiently by adding the result to itself.
+ // -- Then, we need to add the least significant bit that was shifted out.
+ // --- We can convert the least significant bit to float, and add it to the result.
+ Operand lsb = Local(OperandType.I64);
+ Operand half = Local(OperandType.I64);
+
+ Operand lsbF = Local(dest.Type);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, lsb, source));
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, half, source));
+
+ node = nodes.AddAfter(node, new Operation(Instruction.BitwiseAnd, lsb, lsb, Const(1L)));
+ node = nodes.AddAfter(node, new Operation(Instruction.ShiftRightUI, half, half, Const(1)));
+
+ node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, lsbF, lsb));
+ node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, dest, half));
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, dest));
+ node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, lsbF));
+ }
+
+ Delete(currentNode, operation);
+
+ return node;
+ }
+
+ private static LLNode HandleNegate(LLNode node, Operation operation)
+ {
+ // There's no SSE FP negate instruction, so we need to transform that into
+ // a XOR of the value to be negated with a mask with the highest bit set.
+ // This also produces -0 for a negation of the value 0.
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 ||
+ dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\".");
+
+ LinkedList<Node> nodes = node.List;
+
+ LLNode currentNode = node;
+
+ Operand res = Local(dest.Type);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorOne, res));
+
+ if (dest.Type == OperandType.FP32)
+ {
+ node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Pslld, res, res, Const(31)));
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Psllq, res, res, Const(63)));
+ }
+
+ node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Xorps, res, res, source));
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, res));
+
+ Delete(currentNode, operation);
+
+ return node;
+ }
+
+ private static LLNode HandleVectorInsert8(LLNode node, Operation operation)
+ {
+ // Handle vector insertion, when SSE 4.1 is not supported.
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ Debug.Assert(index < 16);
+
+ LinkedList<Node> nodes = node.List;
+
+ LLNode currentNode = node;
+
+ Operand temp1 = Local(OperandType.I32);
+ Operand temp2 = Local(OperandType.I32);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, temp2, src2));
+
+ Operation vextOp = new Operation(Instruction.VectorExtract16, temp1, src1, Const(index >> 1));
+
+ node = nodes.AddAfter(node, vextOp);
+
+ if ((index & 1) != 0)
+ {
+ node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend8, temp1, temp1));
+ node = nodes.AddAfter(node, new Operation(Instruction.ShiftLeft, temp2, temp2, Const(8)));
+ node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, temp1, temp1, temp2));
+ }
+ else
+ {
+ node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend8, temp2, temp2));
+ node = nodes.AddAfter(node, new Operation(Instruction.BitwiseAnd, temp1, temp1, Const(0xff00)));
+ node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, temp1, temp1, temp2));
+ }
+
+ Operation vinsOp = new Operation(Instruction.VectorInsert16, dest, src1, temp1, Const(index >> 1));
+
+ node = nodes.AddAfter(node, vinsOp);
+
+ Delete(currentNode, operation);
+
+ return node;
+ }
+
+ private static LLNode HandleCallWindowsAbi(StackAllocator stackAlloc, LLNode node, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ LinkedList<Node> nodes = node.List;
+
+ // Handle struct arguments.
+ int retArgs = 0;
+
+ int stackAllocOffset = 0;
+
+ int AllocateOnStack(int size)
+ {
+ // We assume that the stack allocator is initially empty (TotalSize = 0).
+ // Taking that into account, we can reuse the space allocated for other
+ // calls by keeping track of our own allocated size (stackAllocOffset).
+ // If the space allocated is not big enough, then we just expand it.
+ int offset = stackAllocOffset;
+
+ if (stackAllocOffset + size > stackAlloc.TotalSize)
+ {
+ stackAlloc.Allocate((stackAllocOffset + size) - stackAlloc.TotalSize);
+ }
+
+ stackAllocOffset += size;
+
+ return offset;
+ }
+
+ Operand arg0Reg = null;
+
+ if (dest != null && dest.Type == OperandType.V128)
+ {
+ int stackOffset = AllocateOnStack(dest.Type.GetSizeInBytes());
+
+ arg0Reg = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+ Operation allocOp = new Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset));
+
+ nodes.AddBefore(node, allocOp);
+
+ retArgs = 1;
+ }
+
+ int argsCount = operation.SourcesCount - 1;
+
+ int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs;
+
+ if (argsCount > maxArgs)
+ {
+ argsCount = maxArgs;
+ }
+
+ Operand[] sources = new Operand[1 + retArgs + argsCount];
+
+ sources[0] = operation.GetSource(0);
+
+ if (arg0Reg != null)
+ {
+ sources[1] = arg0Reg;
+ }
+
+ for (int index = 1; index < operation.SourcesCount; index++)
+ {
+ Operand source = operation.GetSource(index);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand stackAddr = Local(OperandType.I64);
+
+ int stackOffset = AllocateOnStack(source.Type.GetSizeInBytes());
+
+ nodes.AddBefore(node, new Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset)));
+
+ Operation storeOp = new Operation(Instruction.Store, null, stackAddr, source);
+
+ HandleConstantCopy(nodes.AddBefore(node, storeOp), storeOp);
+
+ operation.SetSource(index, stackAddr);
+ }
+ }
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(index + 1);
+
+ Operand argReg;
+
+ int argIndex = index + retArgs;
+
+ if (source.Type.IsInteger())
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(argIndex), source.Type);
+ }
+ else
+ {
+ argReg = Xmm(CallingConvention.GetVecArgumentRegister(argIndex), source.Type);
+ }
+
+ Operation copyOp = new Operation(Instruction.Copy, argReg, source);
+
+ HandleConstantCopy(nodes.AddBefore(node, copyOp), copyOp);
+
+ sources[1 + retArgs + index] = argReg;
+ }
+
+ // The remaining arguments (those that are not passed on registers)
+ // should be passed on the stack, we write them to the stack with "SpillArg".
+ for (int index = argsCount; index < operation.SourcesCount - 1; index++)
+ {
+ Operand source = operation.GetSource(index + 1);
+
+ Operand offset = new Operand((index + retArgs) * 8);
+
+ Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source);
+
+ HandleConstantCopy(nodes.AddBefore(node, spillOp), spillOp);
+ }
+
+ if (dest != null)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retValueAddr = Local(OperandType.I64);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, retValueAddr, arg0Reg));
+
+ Operation loadOp = new Operation(Instruction.Load, dest, retValueAddr);
+
+ node = nodes.AddAfter(node, loadOp);
+
+ operation.Destination = null;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, dest, retReg);
+
+ node = nodes.AddAfter(node, copyOp);
+
+ operation.Destination = retReg;
+ }
+ }
+
+ operation.SetSources(sources);
+
+ return node;
+ }
+
+ private static LLNode HandleCallSystemVAbi(LLNode node, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ LinkedList<Node> nodes = node.List;
+
+ List<Operand> sources = new List<Operand>();
+
+ sources.Add(operation.GetSource(0));
+
+ int argsCount = operation.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetIntArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetVecArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ int stackOffset = 0;
+
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(index + 1);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < intMax;
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, argReg, source);
+
+ HandleConstantCopy(nodes.AddBefore(node, copyOp), copyOp);
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ Operand offset = new Operand(stackOffset);
+
+ Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source);
+
+ HandleConstantCopy(nodes.AddBefore(node, spillOp), spillOp);
+
+ stackOffset += source.Type.GetSizeInBytes();
+ }
+ }
+
+ if (dest != null)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, retLReg));
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
+
+ operation.Destination = null;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, dest, retReg);
+
+ node = nodes.AddAfter(node, copyOp);
+
+ operation.Destination = retReg;
+ }
+ }
+
+ operation.SetSources(sources.ToArray());
+
+ return node;
+ }
+
+ private static void HandleLoadArgumentWindowsAbi(
+ CompilerContext cctx,
+ LLNode node,
+ Operand[] preservedArgs,
+ Operation operation)
+ {
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int retArgs = cctx.FuncReturnType == OperandType.V128 ? 1 : 0;
+
+ int index = source.AsInt32() + retArgs;
+
+ if (index < CallingConvention.GetArgumentsOnRegsCount())
+ {
+ Operand dest = operation.Destination;
+
+ if (preservedArgs[index] == null)
+ {
+ Operand argReg, pArg;
+
+ if (dest.Type.IsInteger())
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), dest.Type);
+
+ pArg = Local(dest.Type);
+ }
+ else if (dest.Type == OperandType.V128)
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), OperandType.I64);
+
+ pArg = Local(OperandType.I64);
+ }
+ else
+ {
+ argReg = Xmm(CallingConvention.GetVecArgumentRegister(index), dest.Type);
+
+ pArg = Local(dest.Type);
+ }
+
+ Operation copyOp = new Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+
+ Operation argCopyOp = new Operation(dest.Type == OperandType.V128
+ ? Instruction.Load
+ : Instruction.Copy, dest, preservedArgs[index]);
+
+ node.List.AddBefore(node, argCopyOp);
+
+ Delete(node, operation);
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ }
+ }
+
+ private static void HandleLoadArgumentSystemVAbi(
+ CompilerContext cctx,
+ LLNode node,
+ Operand[] preservedArgs,
+ Operation operation)
+ {
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int index = source.AsInt32();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ for (int cIndex = 0; cIndex < index; cIndex++)
+ {
+ OperandType argType = cctx.FuncArgTypes[cIndex];
+
+ if (argType.IsInteger())
+ {
+ intCount++;
+ }
+ else if (argType == OperandType.V128)
+ {
+ intCount += 2;
+ }
+ else
+ {
+ vecCount++;
+ }
+ }
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < CallingConvention.GetIntArgumentsOnRegsCount();
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < CallingConvention.GetIntArgumentsOnRegsCount();
+ }
+ else
+ {
+ passOnReg = vecCount < CallingConvention.GetVecArgumentsOnRegsCount();
+ }
+
+ if (passOnReg)
+ {
+ Operand dest = operation.Destination;
+
+ if (preservedArgs[index] == null)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand pArg = Local(OperandType.V128);
+
+ Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
+ Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
+
+ Operation copyL = new Operation(Instruction.VectorCreateScalar, pArg, argLReg);
+ Operation copyH = new Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyH);
+ cctx.Cfg.Entry.Operations.AddFirst(copyL);
+
+ preservedArgs[index] = pArg;
+ }
+ else
+ {
+ Operand pArg = Local(dest.Type);
+
+ Operand argReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+ }
+
+ Operation argCopyOp = new Operation(Instruction.Copy, dest, preservedArgs[index]);
+
+ node.List.AddBefore(node, argCopyOp);
+
+ Delete(node, operation);
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ }
+ }
+
+ private static void HandleReturnWindowsAbi(
+ CompilerContext cctx,
+ LLNode node,
+ Operand[] preservedArgs,
+ Operation operation)
+ {
+ if (operation.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = operation.GetSource(0);
+
+ Operand retReg;
+
+ if (source.Type.IsInteger())
+ {
+ retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type);
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ if (preservedArgs[0] == null)
+ {
+ Operand preservedArg = Local(OperandType.I64);
+
+ Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+ Operation copyOp = new Operation(Instruction.Copy, preservedArg, arg0);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[0] = preservedArg;
+ }
+
+ retReg = preservedArgs[0];
+ }
+ else
+ {
+ retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+ }
+
+ if (source.Type == OperandType.V128)
+ {
+ Operation retStoreOp = new Operation(Instruction.Store, null, retReg, source);
+
+ node.List.AddBefore(node, retStoreOp);
+ }
+ else
+ {
+ Operation retCopyOp = new Operation(Instruction.Copy, retReg, source);
+
+ node.List.AddBefore(node, retCopyOp);
+ }
+
+ operation.SetSources(new Operand[0]);
+ }
+
+ private static void HandleReturnSystemVAbi(LLNode node, Operation operation)
+ {
+ if (operation.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = operation.GetSource(0);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ node.List.AddBefore(node, new Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
+ node.List.AddBefore(node, new Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
+ }
+ else
+ {
+ Operand retReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+
+ Operation retCopyOp = new Operation(Instruction.Copy, retReg, source);
+
+ node.List.AddBefore(node, retCopyOp);
+ }
+ }
+
+ private static Operand AddXmmCopy(LLNode node, Operand source)
+ {
+ Operand temp = Local(source.Type);
+
+ Operand intConst = AddCopy(node, GetIntConst(source));
+
+ Operation copyOp = new Operation(Instruction.VectorCreateScalar, temp, intConst);
+
+ node.List.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ private static Operand AddCopy(LLNode node, Operand source)
+ {
+ Operand temp = Local(source.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, temp, source);
+
+ node.List.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ private static Operand GetIntConst(Operand value)
+ {
+ if (value.Type == OperandType.FP32)
+ {
+ return Const(value.AsInt32());
+ }
+ else if (value.Type == OperandType.FP64)
+ {
+ return Const(value.AsInt64());
+ }
+
+ return value;
+ }
+
+ private static bool IsLongConst(Operand operand)
+ {
+ long value = operand.Type == OperandType.I32
+ ? operand.AsInt32()
+ : operand.AsInt64();
+
+ return !ConstFitsOnS32(value);
+ }
+
+ private static bool ConstFitsOnS32(long value)
+ {
+ return value == (int)value;
+ }
+
+ private static void Delete(LLNode node, Operation operation)
+ {
+ operation.Destination = null;
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, null);
+ }
+
+ node.List.Remove(node);
+ }
+
+ private static Operand Gpr(X86Register register, OperandType type)
+ {
+ return Register((int)register, RegisterType.Integer, type);
+ }
+
+ private static Operand Xmm(X86Register register, OperandType type)
+ {
+ return Register((int)register, RegisterType.Vector, type);
+ }
+
+ private static bool IsSameOperandDestSrc1(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.Multiply:
+ case Instruction.Subtract:
+ return !HardwareCapabilities.SupportsVexEncoding || operation.Destination.Type.IsInteger();
+
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseNot:
+ case Instruction.BitwiseOr:
+ case Instruction.ByteSwap:
+ case Instruction.Negate:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ return true;
+
+ case Instruction.Divide:
+ return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger();
+
+ case Instruction.VectorInsert:
+ case Instruction.VectorInsert16:
+ case Instruction.VectorInsert8:
+ return !HardwareCapabilities.SupportsVexEncoding;
+ }
+
+ return IsVexSameOperandDestSrc1(operation);
+ }
+
+ private static bool IsVexSameOperandDestSrc1(Operation operation)
+ {
+ if (IsIntrinsic(operation.Instruction))
+ {
+ bool isUnary = operation.SourcesCount < 2;
+
+ bool hasVecDest = operation.Destination != null && operation.Destination.Type == OperandType.V128;
+
+ return !HardwareCapabilities.SupportsVexEncoding && !isUnary && hasVecDest;
+ }
+
+ return false;
+ }
+
+ private static bool HasConstSrc1(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Copy:
+ case Instruction.LoadArgument:
+ case Instruction.Spill:
+ case Instruction.SpillArg:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool HasConstSrc2(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.CompareEqual:
+ case Instruction.CompareGreater:
+ case Instruction.CompareGreaterOrEqual:
+ case Instruction.CompareGreaterOrEqualUI:
+ case Instruction.CompareGreaterUI:
+ case Instruction.CompareLess:
+ case Instruction.CompareLessOrEqual:
+ case Instruction.CompareLessOrEqualUI:
+ case Instruction.CompareLessUI:
+ case Instruction.CompareNotEqual:
+ case Instruction.Multiply:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.Subtract:
+ case Instruction.VectorExtract:
+ case Instruction.VectorExtract16:
+ case Instruction.VectorExtract8:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsCommutative(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.CompareEqual:
+ case Instruction.CompareNotEqual:
+ case Instruction.Multiply:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsIntrinsic(Instruction inst)
+ {
+ return inst == Instruction.Extended;
+ }
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/X86Condition.cs b/ARMeilleure/CodeGen/X86/X86Condition.cs
new file mode 100644
index 00000000..a17c6d6c
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/X86Condition.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Condition
+ {
+ Overflow = 0x0,
+ NotOverflow = 0x1,
+ Below = 0x2,
+ AboveOrEqual = 0x3,
+ Equal = 0x4,
+ NotEqual = 0x5,
+ BelowOrEqual = 0x6,
+ Above = 0x7,
+ Sign = 0x8,
+ NotSign = 0x9,
+ ParityEven = 0xa,
+ ParityOdd = 0xb,
+ Less = 0xc,
+ GreaterOrEqual = 0xd,
+ LessOrEqual = 0xe,
+ Greater = 0xf
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs
new file mode 100644
index 00000000..10ba891a
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -0,0 +1,190 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Instruction
+ {
+ Add,
+ Addpd,
+ Addps,
+ Addsd,
+ Addss,
+ And,
+ Andnpd,
+ Andnps,
+ Bsr,
+ Bswap,
+ Call,
+ Cmovcc,
+ Cmp,
+ Cmppd,
+ Cmpps,
+ Cmpsd,
+ Cmpss,
+ Cmpxchg16b,
+ Comisd,
+ Comiss,
+ Cpuid,
+ Cvtdq2pd,
+ Cvtdq2ps,
+ Cvtpd2dq,
+ Cvtpd2ps,
+ Cvtps2dq,
+ Cvtps2pd,
+ Cvtsd2si,
+ Cvtsd2ss,
+ Cvtsi2sd,
+ Cvtsi2ss,
+ Cvtss2sd,
+ Div,
+ Divpd,
+ Divps,
+ Divsd,
+ Divss,
+ Haddpd,
+ Haddps,
+ Idiv,
+ Imul,
+ Imul128,
+ Insertps,
+ Lea,
+ Maxpd,
+ Maxps,
+ Maxsd,
+ Maxss,
+ Minpd,
+ Minps,
+ Minsd,
+ Minss,
+ Mov,
+ Mov16,
+ Mov8,
+ Movd,
+ Movdqu,
+ Movhlps,
+ Movlhps,
+ Movq,
+ Movsd,
+ Movss,
+ Movsx16,
+ Movsx32,
+ Movsx8,
+ Movzx16,
+ Movzx8,
+ Mul128,
+ Mulpd,
+ Mulps,
+ Mulsd,
+ Mulss,
+ Neg,
+ Not,
+ Or,
+ Paddb,
+ Paddd,
+ Paddq,
+ Paddw,
+ Pand,
+ Pandn,
+ Pavgb,
+ Pavgw,
+ Pblendvb,
+ Pcmpeqb,
+ Pcmpeqd,
+ Pcmpeqq,
+ Pcmpeqw,
+ Pcmpgtb,
+ Pcmpgtd,
+ Pcmpgtq,
+ Pcmpgtw,
+ Pextrb,
+ Pextrd,
+ Pextrq,
+ Pextrw,
+ Pinsrb,
+ Pinsrd,
+ Pinsrq,
+ Pinsrw,
+ Pmaxsb,
+ Pmaxsd,
+ Pmaxsw,
+ Pmaxub,
+ Pmaxud,
+ Pmaxuw,
+ Pminsb,
+ Pminsd,
+ Pminsw,
+ Pminub,
+ Pminud,
+ Pminuw,
+ Pmovsxbw,
+ Pmovsxdq,
+ Pmovsxwd,
+ Pmovzxbw,
+ Pmovzxdq,
+ Pmovzxwd,
+ Pmulld,
+ Pmullw,
+ Pop,
+ Popcnt,
+ Por,
+ Pshufb,
+ Pshufd,
+ Pslld,
+ Pslldq,
+ Psllq,
+ Psllw,
+ Psrad,
+ Psraw,
+ Psrld,
+ Psrlq,
+ Psrldq,
+ Psrlw,
+ Psubb,
+ Psubd,
+ Psubq,
+ Psubw,
+ Punpckhbw,
+ Punpckhdq,
+ Punpckhqdq,
+ Punpckhwd,
+ Punpcklbw,
+ Punpckldq,
+ Punpcklqdq,
+ Punpcklwd,
+ Push,
+ Pxor,
+ Rcpps,
+ Rcpss,
+ Ror,
+ Roundpd,
+ Roundps,
+ Roundsd,
+ Roundss,
+ Rsqrtps,
+ Rsqrtss,
+ Sar,
+ Setcc,
+ Shl,
+ Shr,
+ Shufpd,
+ Shufps,
+ Sqrtpd,
+ Sqrtps,
+ Sqrtsd,
+ Sqrtss,
+ Sub,
+ Subpd,
+ Subps,
+ Subsd,
+ Subss,
+ Test,
+ Unpckhpd,
+ Unpckhps,
+ Unpcklpd,
+ Unpcklps,
+ Vpblendvb,
+ Xor,
+ Xorpd,
+ Xorps,
+
+ Count
+ }
+} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/X86Register.cs b/ARMeilleure/CodeGen/X86/X86Register.cs
new file mode 100644
index 00000000..01f63e31
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/X86Register.cs
@@ -0,0 +1,41 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Register
+ {
+ Invalid = -1,
+
+ Rax = 0,
+ Rcx = 1,
+ Rdx = 2,
+ Rbx = 3,
+ Rsp = 4,
+ Rbp = 5,
+ Rsi = 6,
+ Rdi = 7,
+ R8 = 8,
+ R9 = 9,
+ R10 = 10,
+ R11 = 11,
+ R12 = 12,
+ R13 = 13,
+ R14 = 14,
+ R15 = 15,
+
+ Xmm0 = 0,
+ Xmm1 = 1,
+ Xmm2 = 2,
+ Xmm3 = 3,
+ Xmm4 = 4,
+ Xmm5 = 5,
+ Xmm6 = 6,
+ Xmm7 = 7,
+ Xmm8 = 8,
+ Xmm9 = 9,
+ Xmm10 = 10,
+ Xmm11 = 11,
+ Xmm12 = 12,
+ Xmm13 = 13,
+ Xmm14 = 14,
+ Xmm15 = 15
+ }
+} \ No newline at end of file