aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ARMeilleure/CodeGen/Optimizations/Optimizer.cs4
-rw-r--r--ARMeilleure/CodeGen/X86/Assembler.cs14
-rw-r--r--ARMeilleure/CodeGen/X86/CodeGenerator.cs32
-rw-r--r--ARMeilleure/CodeGen/X86/PreAllocator.cs188
-rw-r--r--ARMeilleure/CodeGen/X86/X86Instruction.cs2
-rw-r--r--ARMeilleure/Decoders/Block.cs2
-rw-r--r--ARMeilleure/Decoders/Decoder.cs16
-rw-r--r--ARMeilleure/Decoders/Optimizations/TailCallRemover.cs75
-rw-r--r--ARMeilleure/Instructions/DelegateTypes.cs2
-rw-r--r--ARMeilleure/Instructions/InstEmitAluHelper.cs22
-rw-r--r--ARMeilleure/Instructions/InstEmitException.cs5
-rw-r--r--ARMeilleure/Instructions/InstEmitException32.cs3
-rw-r--r--ARMeilleure/Instructions/InstEmitFlow.cs18
-rw-r--r--ARMeilleure/Instructions/InstEmitFlow32.cs14
-rw-r--r--ARMeilleure/Instructions/InstEmitFlowHelper.cs200
-rw-r--r--ARMeilleure/Instructions/InstEmitHelper.cs32
-rw-r--r--ARMeilleure/Instructions/InstEmitMemoryHelper.cs2
-rw-r--r--ARMeilleure/Instructions/NativeInterface.cs43
-rw-r--r--ARMeilleure/IntermediateRepresentation/Instruction.cs3
-rw-r--r--ARMeilleure/Memory/MemoryManagement.cs38
-rw-r--r--ARMeilleure/Memory/MemoryManagementUnix.cs23
-rw-r--r--ARMeilleure/Memory/MemoryManagementWindows.cs23
-rw-r--r--ARMeilleure/Memory/MemoryManagerPal.cs2
-rw-r--r--ARMeilleure/Memory/ReservedRegion.cs53
-rw-r--r--ARMeilleure/State/ExecutionContext.cs10
-rw-r--r--ARMeilleure/State/NativeContext.cs10
-rw-r--r--ARMeilleure/Translation/ArmEmitterContext.cs15
-rw-r--r--ARMeilleure/Translation/DirectCallStubs.cs131
-rw-r--r--ARMeilleure/Translation/EmitterContext.cs17
-rw-r--r--ARMeilleure/Translation/JitCache.cs11
-rw-r--r--ARMeilleure/Translation/JumpTable.cs149
-rw-r--r--ARMeilleure/Translation/TranslatedFunction.cs9
-rw-r--r--ARMeilleure/Translation/Translator.cs48
-rw-r--r--Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs2
-rw-r--r--Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs4
35 files changed, 1090 insertions, 132 deletions
diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
index d3ffd185..8b0c75fd 100644
--- a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
+++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
@@ -136,7 +136,9 @@ namespace ARMeilleure.CodeGen.Optimizations
private static bool HasSideEffects(Node node)
{
- return (node is Operation operation) && operation.Instruction == Instruction.Call;
+ return (node is Operation operation) && (operation.Instruction == Instruction.Call
+ || operation.Instruction == Instruction.Tailcall
+ || operation.Instruction == Instruction.CompareAndSwap);
}
private static bool IsPropagableCopy(Operation operation)
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs
index 70130d90..5088e6f0 100644
--- a/ARMeilleure/CodeGen/X86/Assembler.cs
+++ b/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -90,6 +90,7 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Cmpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex));
Add(X86Instruction.Cmpsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2));
Add(X86Instruction.Cmpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cmpxchg, new InstructionInfo(0x00000fb1, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW));
Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex));
@@ -117,6 +118,7 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Imul, new InstructionInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstructionFlags.None));
Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None));
Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Jmp, new InstructionInfo(0x040000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None));
Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex));
@@ -328,6 +330,13 @@ namespace ARMeilleure.CodeGen.X86
WriteByte(0x99);
}
+ public void Cmpxchg(MemoryOperand memOp, Operand src)
+ {
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg);
+ }
+
public void Cmpxchg16b(MemoryOperand memOp)
{
WriteByte(LockPrefix);
@@ -480,6 +489,11 @@ namespace ARMeilleure.CodeGen.X86
}
}
+ public void Jmp(Operand dest)
+ {
+ WriteInstruction(dest, null, OperandType.None, X86Instruction.Jmp);
+ }
+
public void Lea(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Lea);
diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
index 32ca6a78..1d0a4c12 100644
--- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs
+++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -34,7 +34,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Instruction.ByteSwap, GenerateByteSwap);
Add(Instruction.Call, GenerateCall);
Add(Instruction.Clobber, GenerateClobber);
- Add(Instruction.CompareAndSwap128, GenerateCompareAndSwap128);
+ Add(Instruction.CompareAndSwap, GenerateCompareAndSwap);
Add(Instruction.CompareEqual, GenerateCompareEqual);
Add(Instruction.CompareGreater, GenerateCompareGreater);
Add(Instruction.CompareGreaterOrEqual, GenerateCompareGreaterOrEqual);
@@ -76,6 +76,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Instruction.Store16, GenerateStore16);
Add(Instruction.Store8, GenerateStore8);
Add(Instruction.Subtract, GenerateSubtract);
+ Add(Instruction.Tailcall, GenerateTailcall);
Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar);
Add(Instruction.VectorExtract, GenerateVectorExtract);
Add(Instruction.VectorExtract16, GenerateVectorExtract16);
@@ -543,13 +544,27 @@ namespace ARMeilleure.CodeGen.X86
// register allocator, we don't need to produce any code.
}
- private static void GenerateCompareAndSwap128(CodeGenContext context, Operation operation)
+ private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation)
{
- Operand source = operation.GetSource(0);
+ Operand src1 = operation.GetSource(0);
+
+ if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3.
+ {
+ MemoryOperand memOp = new MemoryOperand(OperandType.I64, src1);
+
+ context.Assembler.Cmpxchg16b(memOp);
+ }
+ else
+ {
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
- MemoryOperand memOp = new MemoryOperand(OperandType.I64, source);
+ EnsureSameType(src2, src3);
- context.Assembler.Cmpxchg16b(memOp);
+ MemoryOperand memOp = new MemoryOperand(src3.Type, src1);
+
+ context.Assembler.Cmpxchg(memOp, src3);
+ }
}
private static void GenerateCompareEqual(CodeGenContext context, Operation operation)
@@ -1083,6 +1098,13 @@ namespace ARMeilleure.CodeGen.X86
}
}
+ private static void GenerateTailcall(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Jmp(operation.GetSource(0));
+ }
+
private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
{
Operand dest = operation.Destination;
diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs
index 75844b09..e20fca9d 100644
--- a/ARMeilleure/CodeGen/X86/PreAllocator.cs
+++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs
@@ -1,6 +1,7 @@
using ARMeilleure.CodeGen.RegisterAllocators;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
+using System;
using System.Collections.Generic;
using System.Diagnostics;
@@ -101,6 +102,17 @@ namespace ARMeilleure.CodeGen.X86
}
break;
+ case Instruction.Tailcall:
+ if (callConv == CallConvName.Windows)
+ {
+ HandleTailcallWindowsAbi(block.Operations, stackAlloc, node, operation);
+ }
+ else
+ {
+ HandleTailcallSystemVAbi(block.Operations, stackAlloc, node, operation);
+ }
+ break;
+
case Instruction.VectorInsert8:
if (!HardwareCapabilities.SupportsSse41)
{
@@ -199,32 +211,55 @@ namespace ARMeilleure.CodeGen.X86
switch (operation.Instruction)
{
- case Instruction.CompareAndSwap128:
+ case Instruction.CompareAndSwap:
{
- // Handle the many restrictions of the compare and exchange (16 bytes) instruction:
- // - The expected value should be in RDX:RAX.
- // - The new value to be written should be in RCX:RBX.
- // - The value at the memory location is loaded to RDX:RAX.
- void SplitOperand(Operand source, Operand lr, Operand hr)
+ OperandType type = operation.GetSource(1).Type;
+
+ if (type == OperandType.V128)
{
- nodes.AddBefore(node, new Operation(Instruction.VectorExtract, lr, source, Const(0)));
- nodes.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1)));
+ // Handle the many restrictions of the compare and exchange (16 bytes) instruction:
+ // - The expected value should be in RDX:RAX.
+ // - The new value to be written should be in RCX:RBX.
+ // - The value at the memory location is loaded to RDX:RAX.
+ void SplitOperand(Operand source, Operand lr, Operand hr)
+ {
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, lr, source, Const(0)));
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1)));
+ }
+
+ Operand rax = Gpr(X86Register.Rax, OperandType.I64);
+ Operand rbx = Gpr(X86Register.Rbx, OperandType.I64);
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
+ Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
+
+ SplitOperand(operation.GetSource(1), rax, rdx);
+ SplitOperand(operation.GetSource(2), rbx, rcx);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, rax));
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1)));
+
+ operation.SetDestinations(new Operand[] { rdx, rax });
+
+ operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx });
}
+ else
+ {
+ // Handle the many restrictions of the compare and exchange (32/64) instruction:
+ // - The expected value should be in (E/R)AX.
+ // - The value at the memory location is loaded to (E/R)AX.
- Operand rax = Gpr(X86Register.Rax, OperandType.I64);
- Operand rbx = Gpr(X86Register.Rbx, OperandType.I64);
- Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
- Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
+ Operand expected = operation.GetSource(1);
- SplitOperand(operation.GetSource(1), rax, rdx);
- SplitOperand(operation.GetSource(2), rbx, rcx);
+ Operand rax = Gpr(X86Register.Rax, expected.Type);
- node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, rax));
- node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1)));
+ nodes.AddBefore(node, new Operation(Instruction.Copy, rax, expected));
- operation.SetDestinations(new Operand[] { rdx, rax });
+ operation.SetSources(new Operand[] { operation.GetSource(0), rax, operation.GetSource(2) });
- operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx });
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rax));
+
+ operation.Destination = rax;
+ }
break;
}
@@ -829,6 +864,123 @@ namespace ARMeilleure.CodeGen.X86
return node;
}
+ private static void HandleTailcallSystemVAbi(IntrusiveList<Node> nodes, StackAllocator stackAlloc, Node node, Operation operation)
+ {
+ List<Operand> sources = new List<Operand>();
+
+ sources.Add(operation.GetSource(0));
+
+ int argsCount = operation.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetIntArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetVecArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(1 + index);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, argReg, source);
+
+ HandleConstantCopy(nodes, nodes.AddBefore(node, copyOp), copyOp);
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+ }
+
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+
+ Operation addrCopyOp = new Operation(Instruction.Copy, retReg, operation.GetSource(0));
+
+ nodes.AddBefore(node, addrCopyOp);
+
+ sources[0] = retReg;
+
+ operation.SetSources(sources.ToArray());
+ }
+
+ private static void HandleTailcallWindowsAbi(IntrusiveList<Node> nodes, StackAllocator stackAlloc, Node node, Operation operation)
+ {
+ int argsCount = operation.SourcesCount - 1;
+
+ int maxArgs = CallingConvention.GetArgumentsOnRegsCount();
+
+ if (argsCount > maxArgs)
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+
+ Operand[] sources = new Operand[1 + argsCount];
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(1 + index);
+
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(index), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(index), source.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, argReg, source);
+
+ HandleConstantCopy(nodes, nodes.AddBefore(node, copyOp), copyOp);
+
+ sources[1 + index] = argReg;
+ }
+
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+
+ Operation addrCopyOp = new Operation(Instruction.Copy, retReg, operation.GetSource(0));
+
+ nodes.AddBefore(node, addrCopyOp);
+
+ sources[0] = retReg;
+
+ operation.SetSources(sources);
+ }
+
private static void HandleLoadArgumentWindowsAbi(
CompilerContext cctx,
IntrusiveList<Node> nodes,
diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs
index 813730f2..a6dbf1a5 100644
--- a/ARMeilleure/CodeGen/X86/X86Instruction.cs
+++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -23,6 +23,7 @@ namespace ARMeilleure.CodeGen.X86
Cmpps,
Cmpsd,
Cmpss,
+ Cmpxchg,
Cmpxchg16b,
Comisd,
Comiss,
@@ -50,6 +51,7 @@ namespace ARMeilleure.CodeGen.X86
Imul,
Imul128,
Insertps,
+ Jmp,
Lea,
Maxpd,
Maxps,
diff --git a/ARMeilleure/Decoders/Block.cs b/ARMeilleure/Decoders/Block.cs
index 3d13c2d5..d38b5a8e 100644
--- a/ARMeilleure/Decoders/Block.cs
+++ b/ARMeilleure/Decoders/Block.cs
@@ -11,6 +11,8 @@ namespace ARMeilleure.Decoders
public Block Next { get; set; }
public Block Branch { get; set; }
+ public bool TailCall { get; set; }
+
public List<OpCode> OpCodes { get; private set; }
public Block()
diff --git a/ARMeilleure/Decoders/Decoder.cs b/ARMeilleure/Decoders/Decoder.cs
index 7cbb62e6..9675dc8d 100644
--- a/ARMeilleure/Decoders/Decoder.cs
+++ b/ARMeilleure/Decoders/Decoder.cs
@@ -1,3 +1,4 @@
+using ARMeilleure.Decoders.Optimizations;
using ARMeilleure.Instructions;
using ARMeilleure.Memory;
using ARMeilleure.State;
@@ -15,6 +16,9 @@ namespace ARMeilleure.Decoders
// take too long to compile and use too much memory.
private const int MaxInstsPerFunction = 5000;
+ // For lower code quality translation, we set a lower limit since we're blocking execution.
+ private const int MaxInstsPerFunctionLowCq = 500;
+
private delegate object MakeOp(InstDescriptor inst, ulong address, int opCode);
private static ConcurrentDictionary<Type, MakeOp> _opActivators;
@@ -33,7 +37,7 @@ namespace ARMeilleure.Decoders
return new Block[] { block };
}
- public static Block[] DecodeFunction(MemoryManager memory, ulong address, ExecutionMode mode)
+ public static Block[] DecodeFunction(MemoryManager memory, ulong address, ExecutionMode mode, bool highCq)
{
List<Block> blocks = new List<Block>();
@@ -43,11 +47,13 @@ namespace ARMeilleure.Decoders
int opsCount = 0;
+ int instructionLimit = highCq ? MaxInstsPerFunction : MaxInstsPerFunctionLowCq;
+
Block GetBlock(ulong blkAddress)
{
if (!visited.TryGetValue(blkAddress, out Block block))
{
- if (opsCount > MaxInstsPerFunction || !memory.IsMapped((long)blkAddress))
+ if (opsCount > instructionLimit || !memory.IsMapped((long)blkAddress))
{
return null;
}
@@ -121,7 +127,7 @@ namespace ARMeilleure.Decoders
currBlock.Branch = GetBlock((ulong)op.Immediate);
}
- if (!IsUnconditionalBranch(lastOp) /*|| isCall*/)
+ if (!IsUnconditionalBranch(lastOp) || isCall)
{
currBlock.Next = GetBlock(currBlock.EndAddress);
}
@@ -140,10 +146,12 @@ namespace ARMeilleure.Decoders
}
}
+ TailCallRemover.RunPass(address, blocks);
+
return blocks.ToArray();
}
- private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
+ public static bool BinarySearch(List<Block> blocks, ulong address, out int index)
{
index = 0;
diff --git a/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs b/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs
new file mode 100644
index 00000000..2d6439ba
--- /dev/null
+++ b/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs
@@ -0,0 +1,75 @@
+using ARMeilleure.Decoders;
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Decoders.Optimizations
+{
+ static class TailCallRemover
+ {
+ public static void RunPass(ulong entryAddress, List<Block> blocks)
+ {
+ // Detect tail calls:
+ // - Assume this function spans the space covered by contiguous code blocks surrounding the entry address.
+ // - Unconditional jump to an area outside this contiguous region will be treated as a tail call.
+ // - Include a small allowance for jumps outside the contiguous range.
+
+ if (!Decoder.BinarySearch(blocks, entryAddress, out int entryBlockId))
+ {
+ throw new InvalidOperationException("Function entry point is not contained in a block.");
+ }
+
+ const ulong allowance = 4;
+ Block entryBlock = blocks[entryBlockId];
+ int startBlockIndex = entryBlockId;
+ Block startBlock = entryBlock;
+ int endBlockIndex = entryBlockId;
+ Block endBlock = entryBlock;
+
+ for (int i = entryBlockId + 1; i < blocks.Count; i++) // Search forwards.
+ {
+ Block block = blocks[i];
+ if (endBlock.EndAddress < block.Address - allowance)
+ {
+ break; // End of contiguous function.
+ }
+
+ endBlock = block;
+ endBlockIndex = i;
+ }
+
+ for (int i = entryBlockId - 1; i >= 0; i--) // Search backwards.
+ {
+ Block block = blocks[i];
+ if (startBlock.Address > block.EndAddress + allowance)
+ {
+ break; // End of contiguous function.
+ }
+
+ startBlock = block;
+ startBlockIndex = i;
+ }
+
+ if (startBlockIndex == 0 && endBlockIndex == blocks.Count - 1)
+ {
+ return; // Nothing to do here.
+ }
+
+ // Replace all branches to blocks outside the range with null, and force a tail call.
+
+ for (int i = startBlockIndex; i <= endBlockIndex; i++)
+ {
+ Block block = blocks[i];
+ if (block.Branch != null && (block.Branch.Address > endBlock.EndAddress || block.Branch.EndAddress < startBlock.Address))
+ {
+ block.Branch = null;
+ block.TailCall = true;
+ }
+ }
+
+ // Finally, delete all blocks outside the contiguous range.
+
+ blocks.RemoveRange(endBlockIndex + 1, (blocks.Count - endBlockIndex) - 1);
+ blocks.RemoveRange(0, startBlockIndex);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/DelegateTypes.cs b/ARMeilleure/Instructions/DelegateTypes.cs
index b65149cb..41614f88 100644
--- a/ARMeilleure/Instructions/DelegateTypes.cs
+++ b/ARMeilleure/Instructions/DelegateTypes.cs
@@ -3,6 +3,8 @@ using System;
namespace ARMeilleure.Instructions
{
+ delegate bool _Bool();
+
delegate double _F64_F64(double a1);
delegate double _F64_F64_Bool(double a1, bool a2);
delegate double _F64_F64_F64(double a1, double a2);
diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs
index 916a1da5..12fa1bf1 100644
--- a/ARMeilleure/Instructions/InstEmitAluHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs
@@ -116,12 +116,14 @@ namespace ARMeilleure.Instructions
{
Debug.Assert(value.Type == OperandType.I32);
- context.StoreToContext();
-
if (IsThumb(context.CurrOp))
{
- // Make this count as a call, the translator will ignore the low bit for the address.
- context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(value, Const(1))));
+ context.StoreToContext();
+ bool isReturn = IsA32Return(context);
+
+ Operand addr = context.BitwiseOr(value, Const(1));
+
+ InstEmitFlowHelper.EmitVirtualJump(context, addr, isReturn);
}
else
{
@@ -138,18 +140,8 @@ namespace ARMeilleure.Instructions
if (setFlags)
{
// TODO: Load SPSR etc.
- Operand isThumb = GetFlag(PState.TFlag);
-
- Operand lblThumb = Label();
-
- context.BranchIfTrue(lblThumb, isThumb);
-
- // Make this count as a call, the translator will ignore the low bit for the address.
- context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(context.BitwiseAnd(value, Const(~3)), Const(1))));
-
- context.MarkLabel(lblThumb);
- context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(value, Const(1))));
+ EmitBxWritePc(context, value);
}
else
{
diff --git a/ARMeilleure/Instructions/InstEmitException.cs b/ARMeilleure/Instructions/InstEmitException.cs
index 6f7b6fd5..f0bde242 100644
--- a/ARMeilleure/Instructions/InstEmitException.cs
+++ b/ARMeilleure/Instructions/InstEmitException.cs
@@ -2,6 +2,7 @@ using ARMeilleure.Decoders;
using ARMeilleure.Translation;
using System;
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Instructions
@@ -30,7 +31,7 @@ namespace ARMeilleure.Instructions
if (context.CurrBlock.Next == null)
{
- context.Return(Const(op.Address + 4));
+ EmitTailContinue(context, Const(op.Address + 4));
}
}
@@ -48,7 +49,7 @@ namespace ARMeilleure.Instructions
if (context.CurrBlock.Next == null)
{
- context.Return(Const(op.Address + 4));
+ EmitTailContinue(context, Const(op.Address + 4));
}
}
}
diff --git a/ARMeilleure/Instructions/InstEmitException32.cs b/ARMeilleure/Instructions/InstEmitException32.cs
index a73f0dec..8ffad1d1 100644
--- a/ARMeilleure/Instructions/InstEmitException32.cs
+++ b/ARMeilleure/Instructions/InstEmitException32.cs
@@ -1,6 +1,7 @@
using ARMeilleure.Decoders;
using ARMeilleure.Translation;
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Instructions
@@ -29,7 +30,7 @@ namespace ARMeilleure.Instructions
if (context.CurrBlock.Next == null)
{
- context.Return(Const(op.Address + 4));
+ EmitTailContinue(context, Const(op.Address + 4));
}
}
}
diff --git a/ARMeilleure/Instructions/InstEmitFlow.cs b/ARMeilleure/Instructions/InstEmitFlow.cs
index 93d36e1b..bac9ec58 100644
--- a/ARMeilleure/Instructions/InstEmitFlow.cs
+++ b/ARMeilleure/Instructions/InstEmitFlow.cs
@@ -21,7 +21,7 @@ namespace ARMeilleure.Instructions
}
else
{
- context.Return(Const(op.Immediate));
+ EmitTailContinue(context, Const(op.Immediate), context.CurrBlock.TailCall);
}
}
@@ -56,7 +56,7 @@ namespace ARMeilleure.Instructions
{
OpCodeBReg op = (OpCodeBReg)context.CurrOp;
- EmitVirtualJump(context, GetIntOrZR(context, op.Rn));
+ EmitVirtualJump(context, GetIntOrZR(context, op.Rn), op.Rn == RegisterAlias.Lr);
}
public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true);
@@ -71,7 +71,7 @@ namespace ARMeilleure.Instructions
public static void Ret(ArmEmitterContext context)
{
- context.Return(context.BitwiseOr(GetIntOrZR(context, RegisterAlias.Lr), Const(CallFlag)));
+ context.Return(GetIntOrZR(context, RegisterAlias.Lr));
}
public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true);
@@ -96,7 +96,7 @@ namespace ARMeilleure.Instructions
if (context.CurrBlock.Next == null)
{
- context.Return(Const(op.Address + 4));
+ EmitTailContinue(context, Const(op.Address + 4));
}
}
else
@@ -105,11 +105,11 @@ namespace ARMeilleure.Instructions
EmitCondBranch(context, lblTaken, cond);
- context.Return(Const(op.Address + 4));
+ EmitTailContinue(context, Const(op.Address + 4));
context.MarkLabel(lblTaken);
- context.Return(Const(op.Immediate));
+ EmitTailContinue(context, Const(op.Immediate));
}
}
@@ -132,7 +132,7 @@ namespace ARMeilleure.Instructions
if (context.CurrBlock.Next == null)
{
- context.Return(Const(op.Address + 4));
+ EmitTailContinue(context, Const(op.Address + 4));
}
}
else
@@ -148,11 +148,11 @@ namespace ARMeilleure.Instructions
context.BranchIfFalse(lblTaken, value);
}
- context.Return(Const(op.Address + 4));
+ EmitTailContinue(context, Const(op.Address + 4));
context.MarkLabel(lblTaken);
- context.Return(Const(op.Immediate));
+ EmitTailContinue(context, Const(op.Immediate));
}
}
}
diff --git a/ARMeilleure/Instructions/InstEmitFlow32.cs b/ARMeilleure/Instructions/InstEmitFlow32.cs
index cbb9ad5b..47233eb9 100644
--- a/ARMeilleure/Instructions/InstEmitFlow32.cs
+++ b/ARMeilleure/Instructions/InstEmitFlow32.cs
@@ -21,8 +21,7 @@ namespace ARMeilleure.Instructions
}
else
{
- context.StoreToContext();
- context.Return(Const(op.Immediate));
+ EmitTailContinue(context, Const(op.Immediate));
}
}
@@ -57,7 +56,7 @@ namespace ARMeilleure.Instructions
SetFlag(context, PState.TFlag, Const(isThumb ? 0 : 1));
}
- InstEmitFlowHelper.EmitCall(context, (ulong)op.Immediate);
+ EmitCall(context, (ulong)op.Immediate);
}
public static void Blxr(ArmEmitterContext context)
@@ -66,9 +65,8 @@ namespace ARMeilleure.Instructions
uint pc = op.GetPc();
- Operand addr = GetIntA32(context, op.Rm);
+ Operand addr = context.Copy(GetIntA32(context, op.Rm));
Operand bitOne = context.BitwiseAnd(addr, Const(1));
- addr = context.BitwiseOr(addr, Const((int)CallFlag)); // Set call flag.
bool isThumb = IsThumb(context.CurrOp);
@@ -80,16 +78,14 @@ namespace ARMeilleure.Instructions
SetFlag(context, PState.TFlag, bitOne);
- context.Return(addr); // Call.
+ EmitVirtualCall(context, addr);
}
public static void Bx(ArmEmitterContext context)
{
IOpCode32BReg op = (IOpCode32BReg)context.CurrOp;
- context.StoreToContext();
-
- EmitBxWritePc(context, GetIntA32(context, op.Rm));
+ EmitBxWritePc(context, GetIntA32(context, op.Rm), op.Rm);
}
}
} \ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/ARMeilleure/Instructions/InstEmitFlowHelper.cs
index a8eb21d3..f0a81e85 100644
--- a/ARMeilleure/Instructions/InstEmitFlowHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitFlowHelper.cs
@@ -2,6 +2,7 @@ using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using ARMeilleure.Translation;
+using System;
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
@@ -142,7 +143,29 @@ namespace ARMeilleure.Instructions
public static void EmitCall(ArmEmitterContext context, ulong immediate)
{
- context.Return(Const(immediate | CallFlag));
+ EmitJumpTableBranch(context, Const(immediate));
+ }
+
+ private static void EmitNativeCall(ArmEmitterContext context, Operand nativeContextPtr, Operand funcAddr, bool isJump = false)
+ {
+ context.StoreToContext();
+ Operand returnAddress;
+ if (isJump)
+ {
+ context.Tailcall(funcAddr, nativeContextPtr);
+ }
+ else
+ {
+ returnAddress = context.Call(funcAddr, OperandType.I64, nativeContextPtr);
+ context.LoadFromContext();
+
+ EmitContinueOrReturnCheck(context, returnAddress);
+ }
+ }
+
+ private static void EmitNativeCall(ArmEmitterContext context, Operand funcAddr, bool isJump = false)
+ {
+ EmitNativeCall(context, context.LoadArgument(OperandType.I64, 0), funcAddr, isJump);
}
public static void EmitVirtualCall(ArmEmitterContext context, Operand target)
@@ -150,37 +173,45 @@ namespace ARMeilleure.Instructions
EmitVirtualCallOrJump(context, target, isJump: false);
}
- public static void EmitVirtualJump(ArmEmitterContext context, Operand target)
+ public static void EmitVirtualJump(ArmEmitterContext context, Operand target, bool isReturn)
{
- EmitVirtualCallOrJump(context, target, isJump: true);
+ EmitVirtualCallOrJump(context, target, isJump: true, isReturn: isReturn);
}
- private static void EmitVirtualCallOrJump(ArmEmitterContext context, Operand target, bool isJump)
+ private static void EmitVirtualCallOrJump(ArmEmitterContext context, Operand target, bool isJump, bool isReturn = false)
{
- context.Return(context.BitwiseOr(target, Const(target.Type, (long)CallFlag)));
+ if (isReturn)
+ {
+ context.Return(target);
+ }
+ else
+ {
+ EmitJumpTableBranch(context, target, isJump);
+ }
}
- private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand retVal)
+ private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand returnAddress)
{
- // Note: The return value of the called method will be placed
- // at the Stack, the return value is always a Int64 with the
- // return address of the function. We check if the address is
- // correct, if it isn't we keep returning until we reach the dispatcher.
- ulong nextAddr = GetNextOpAddress(context.CurrOp);
+ // Note: The return value of a translated function is always an Int64 with the
+ // address execution has returned to. We expect this address to be immediately after the
+ // current instruction, if it isn't we keep returning until we reach the dispatcher.
+ Operand nextAddr = Const(GetNextOpAddress(context.CurrOp));
- if (context.CurrBlock.Next != null)
- {
- Operand lblContinue = Label();
+ // Try to continue within this block.
+ // If the return address isn't to our next instruction, we need to return so the JIT can figure out what to do.
+ Operand lblContinue = Label();
- context.BranchIfTrue(lblContinue, context.ICompareEqual(retVal, Const(nextAddr)));
+ // We need to clear out the call flag for the return address before comparing it.
+ context.BranchIfTrue(lblContinue, context.ICompareEqual(context.BitwiseAnd(returnAddress, Const(~CallFlag)), nextAddr));
- context.Return(Const(nextAddr));
+ context.Return(returnAddress);
- context.MarkLabel(lblContinue);
- }
- else
+ context.MarkLabel(lblContinue);
+
+ if (context.CurrBlock.Next == null)
{
- context.Return(Const(nextAddr));
+ // No code following this instruction, try and find the next block and jump to it.
+ EmitTailContinue(context, nextAddr);
}
}
@@ -188,5 +219,134 @@ namespace ARMeilleure.Instructions
{
return op.Address + (ulong)op.OpCodeSizeInBytes;
}
+
+ public static void EmitTailContinue(ArmEmitterContext context, Operand address, bool allowRejit = false)
+ {
+ bool useTailContinue = true; // Left option here as it may be useful if we need to return to managed rather than tail call in future. (eg. for debug)
+ if (useTailContinue)
+ {
+ if (allowRejit)
+ {
+ address = context.BitwiseOr(address, Const(1L));
+ }
+
+ Operand fallbackAddr = context.Call(new _U64_U64(NativeInterface.GetFunctionAddress), address);
+
+ EmitNativeCall(context, fallbackAddr, true);
+ }
+ else
+ {
+ context.Return(address);
+ }
+ }
+
+ private static void EmitNativeCallWithGuestAddress(ArmEmitterContext context, Operand funcAddr, Operand guestAddress, bool isJump)
+ {
+ Operand nativeContextPtr = context.LoadArgument(OperandType.I64, 0);
+ context.Store(context.Add(nativeContextPtr, Const(NativeContext.GetCallAddressOffset())), guestAddress);
+
+ EmitNativeCall(context, nativeContextPtr, funcAddr, isJump);
+ }
+
+ private static void EmitBranchFallback(ArmEmitterContext context, Operand address, bool isJump)
+ {
+ address = context.BitwiseOr(address, Const(address.Type, (long)CallFlag)); // Set call flag.
+ Operand fallbackAddr = context.Call(new _U64_U64(NativeInterface.GetFunctionAddress), address);
+ EmitNativeCall(context, fallbackAddr, isJump);
+ }
+
+ public static void EmitDynamicTableCall(ArmEmitterContext context, Operand tableAddress, Operand address, bool isJump)
+ {
+ // Loop over elements of the dynamic table. Unrolled loop.
+
+ Operand endLabel = Label();
+ Operand fallbackLabel = Label();
+
+ Action<Operand> emitTableEntry = (Operand entrySkipLabel) =>
+ {
+ // Try to take this entry in the table if its guest address equals 0.
+ Operand gotResult = context.CompareAndSwap(tableAddress, Const(0L), address);
+
+ // Is the address ours? (either taken via CompareAndSwap (0), or what was already here)
+ context.BranchIfFalse(entrySkipLabel, context.BitwiseOr(context.ICompareEqual(gotResult, address), context.ICompareEqual(gotResult, Const(0L))));
+
+ // It's ours, so what function is it pointing to?
+ Operand targetFunctionPtr = context.Add(tableAddress, Const(8L));
+ Operand targetFunction = context.Load(OperandType.I64, targetFunctionPtr);
+
+ // Call the function.
+ // We pass in the entry address as the guest address, as the entry may need to be updated by the indirect call stub.
+ EmitNativeCallWithGuestAddress(context, targetFunction, tableAddress, isJump);
+ context.Branch(endLabel);
+ };
+
+ // Currently this uses a size of 1, as higher values inflate code size for no real benefit.
+ for (int i = 0; i < JumpTable.DynamicTableElems; i++)
+ {
+ if (i == JumpTable.DynamicTableElems - 1)
+ {
+ emitTableEntry(fallbackLabel); // If this is the last entry, avoid emitting the additional label and add.
+ }
+ else
+ {
+ Operand nextLabel = Label();
+
+ emitTableEntry(nextLabel);
+
+ context.MarkLabel(nextLabel);
+ tableAddress = context.Add(tableAddress, Const((long)JumpTable.JumpTableStride)); // Move to the next table entry.
+ }
+ }
+
+ context.MarkLabel(fallbackLabel);
+
+ EmitBranchFallback(context, address, isJump);
+
+ context.MarkLabel(endLabel);
+ }
+
+ public static void EmitJumpTableBranch(ArmEmitterContext context, Operand address, bool isJump = false)
+ {
+ if (address.Type == OperandType.I32)
+ {
+ address = context.ZeroExtend32(OperandType.I64, address);
+ }
+
+ // TODO: Constant folding. Indirect calls are slower in the best case and emit more code so we want to avoid them when possible.
+ bool isConst = address.Kind == OperandKind.Constant;
+ long constAddr = (long)address.Value;
+
+ if (!context.HighCq)
+ {
+ // Don't emit indirect calls or jumps if we're compiling in lowCq mode.
+ // This avoids wasting space on the jump and indirect tables.
+ // Just ask the translator for the function address.
+
+ EmitBranchFallback(context, address, isJump);
+ }
+ else if (!isConst)
+ {
+ // Virtual branch/call - store first used addresses on a small table for fast lookup.
+ int entry = context.JumpTable.ReserveDynamicEntry(isJump);
+
+ int jumpOffset = entry * JumpTable.JumpTableStride * JumpTable.DynamicTableElems;
+ Operand dynTablePtr = Const(context.JumpTable.DynamicPointer.ToInt64() + jumpOffset);
+
+ EmitDynamicTableCall(context, dynTablePtr, address, isJump);
+ }
+ else
+ {
+ int entry = context.JumpTable.ReserveTableEntry(context.BaseAddress & (~3L), constAddr, isJump);
+
+ int jumpOffset = entry * JumpTable.JumpTableStride + 8; // Offset directly to the host address.
+
+ // TODO: Relocatable jump table ptr for AOT. Would prefer a solution to patch this constant into functions as they are loaded rather than calculate at runtime.
+ Operand tableEntryPtr = Const(context.JumpTable.JumpPointer.ToInt64() + jumpOffset);
+
+ Operand funcAddr = context.Load(OperandType.I64, tableEntryPtr);
+
+ EmitNativeCallWithGuestAddress(context, funcAddr, address, isJump); // Call the function directly. If it's not present yet, this will call the direct call stub.
+ }
+ }
}
}
diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs
index f5495c66..a4227543 100644
--- a/ARMeilleure/Instructions/InstEmitHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitHelper.cs
@@ -144,22 +144,34 @@ namespace ARMeilleure.Instructions
}
}
- public static void EmitBxWritePc(ArmEmitterContext context, Operand pc)
+ public static bool IsA32Return(ArmEmitterContext context)
{
+ switch (context.CurrOp)
+ {
+ case IOpCode32MemMult op:
+ return true; // Setting PC using LDM is nearly always a return.
+ case OpCode32AluRsImm op:
+ return op.Rm == RegisterAlias.Aarch32Lr;
+ case OpCode32AluRsReg op:
+ return op.Rm == RegisterAlias.Aarch32Lr;
+ case OpCode32AluReg op:
+ return op.Rm == RegisterAlias.Aarch32Lr;
+ case OpCode32Mem op:
+ return op.Rn == RegisterAlias.Aarch32Sp && op.WBack && !op.Index; // Setting PC to an address stored on the stack is nearly always a return.
+ }
+ return false;
+ }
+
+ public static void EmitBxWritePc(ArmEmitterContext context, Operand pc, int sourceRegister = 0)
+ {
+ bool isReturn = sourceRegister == RegisterAlias.Aarch32Lr || IsA32Return(context);
Operand mode = context.BitwiseAnd(pc, Const(1));
SetFlag(context, PState.TFlag, mode);
- Operand lblArmMode = Label();
-
- context.BranchIfTrue(lblArmMode, mode);
-
- // Make this count as a call, the translator will ignore the low bit for the address.
- context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(pc, Const((int)InstEmitFlowHelper.CallFlag))));
-
- context.MarkLabel(lblArmMode);
+ Operand addr = context.ConditionalSelect(mode, context.BitwiseOr(pc, Const((int)InstEmitFlowHelper.CallFlag)), context.BitwiseAnd(pc, Const(~3)));
- context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(context.BitwiseAnd(pc, Const(~3)), Const((int)InstEmitFlowHelper.CallFlag))));
+ InstEmitFlowHelper.EmitVirtualJump(context, addr, isReturn);
}
public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex)
diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
index 70861d16..e1dec331 100644
--- a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
@@ -51,7 +51,7 @@ namespace ARMeilleure.Instructions
EmitReadInt(context, address, rt, size);
}
- if (!isSimd)
+ if (!isSimd && !(context.CurrOp is OpCode32 && rt == State.RegisterAlias.Aarch32Pc))
{
Operand value = GetInt(context, rt);
diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs
index 988e86bd..4514c0da 100644
--- a/ARMeilleure/Instructions/NativeInterface.cs
+++ b/ARMeilleure/Instructions/NativeInterface.cs
@@ -1,6 +1,8 @@
using ARMeilleure.Memory;
using ARMeilleure.State;
+using ARMeilleure.Translation;
using System;
+using System.Runtime.InteropServices;
namespace ARMeilleure.Instructions
{
@@ -10,17 +12,19 @@ namespace ARMeilleure.Instructions
private class ThreadContext
{
- public ExecutionContext Context { get; }
- public MemoryManager Memory { get; }
+ public ExecutionContext Context { get; }
+ public MemoryManager Memory { get; }
+ public Translator Translator { get; }
public ulong ExclusiveAddress { get; set; }
public ulong ExclusiveValueLow { get; set; }
public ulong ExclusiveValueHigh { get; set; }
- public ThreadContext(ExecutionContext context, MemoryManager memory)
+ public ThreadContext(ExecutionContext context, MemoryManager memory, Translator translator)
{
- Context = context;
- Memory = memory;
+ Context = context;
+ Memory = memory;
+ Translator = translator;
ExclusiveAddress = ulong.MaxValue;
}
@@ -29,9 +33,9 @@ namespace ARMeilleure.Instructions
[ThreadStatic]
private static ThreadContext _context;
- public static void RegisterThread(ExecutionContext context, MemoryManager memory)
+ public static void RegisterThread(ExecutionContext context, MemoryManager memory, Translator translator)
{
- _context = new ThreadContext(context, memory);
+ _context = new ThreadContext(context, memory, translator);
}
public static void UnregisterThread()
@@ -381,18 +385,39 @@ namespace ARMeilleure.Instructions
return address & ~((4UL << ErgSizeLog2) - 1);
}
+ public static ulong GetFunctionAddress(ulong address)
+ {
+ TranslatedFunction function = _context.Translator.GetOrTranslate(address, GetContext().ExecutionMode);
+ return (ulong)function.GetPointer().ToInt64();
+ }
+
+ public static ulong GetIndirectFunctionAddress(ulong address, ulong entryAddress)
+ {
+ TranslatedFunction function = _context.Translator.GetOrTranslate(address, GetContext().ExecutionMode);
+ ulong ptr = (ulong)function.GetPointer().ToInt64();
+ if (function.HighCq)
+ {
+ // Rewrite the host function address in the table to point to the highCq function.
+ Marshal.WriteInt64((IntPtr)entryAddress, 8, (long)ptr);
+ }
+ return ptr;
+ }
+
public static void ClearExclusive()
{
_context.ExclusiveAddress = ulong.MaxValue;
}
- public static void CheckSynchronization()
+ public static bool CheckSynchronization()
{
Statistics.PauseTimer();
- GetContext().CheckInterrupt();
+ ExecutionContext context = GetContext();
+ context.CheckInterrupt();
Statistics.ResumeTimer();
+
+ return context.Running;
}
public static ExecutionContext GetContext()
diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs
index 4c4ecb8f..d1ce1aa3 100644
--- a/ARMeilleure/IntermediateRepresentation/Instruction.cs
+++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs
@@ -12,7 +12,7 @@ namespace ARMeilleure.IntermediateRepresentation
BranchIfTrue,
ByteSwap,
Call,
- CompareAndSwap128,
+ CompareAndSwap,
CompareEqual,
CompareGreater,
CompareGreaterOrEqual,
@@ -52,6 +52,7 @@ namespace ARMeilleure.IntermediateRepresentation
Store16,
Store8,
Subtract,
+ Tailcall,
VectorCreateScalar,
VectorExtract,
VectorExtract16,
diff --git a/ARMeilleure/Memory/MemoryManagement.cs b/ARMeilleure/Memory/MemoryManagement.cs
index e299ae49..ba62f8e7 100644
--- a/ARMeilleure/Memory/MemoryManagement.cs
+++ b/ARMeilleure/Memory/MemoryManagement.cs
@@ -44,6 +44,25 @@ namespace ARMeilleure.Memory
}
}
+ public static bool Commit(IntPtr address, ulong size)
+ {
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ IntPtr sizeNint = new IntPtr((long)size);
+
+ return MemoryManagementWindows.Commit(address, sizeNint);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+ RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ return MemoryManagementUnix.Commit(address, size);
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+ }
+
public static void Reprotect(IntPtr address, ulong size, MemoryProtection permission)
{
bool result;
@@ -70,6 +89,25 @@ namespace ARMeilleure.Memory
}
}
+ public static IntPtr Reserve(ulong size)
+ {
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ IntPtr sizeNint = new IntPtr((long)size);
+
+ return MemoryManagementWindows.Reserve(sizeNint);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+ RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ return MemoryManagementUnix.Reserve(size);
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+ }
+
public static bool Free(IntPtr address)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
diff --git a/ARMeilleure/Memory/MemoryManagementUnix.cs b/ARMeilleure/Memory/MemoryManagementUnix.cs
index 3331fb42..e9b29608 100644
--- a/ARMeilleure/Memory/MemoryManagementUnix.cs
+++ b/ARMeilleure/Memory/MemoryManagementUnix.cs
@@ -30,6 +30,11 @@ namespace ARMeilleure.Memory
return ptr;
}
+ public static bool Commit(IntPtr address, ulong size)
+ {
+ return Syscall.mprotect(address, size, MmapProts.PROT_READ | MmapProts.PROT_WRITE) == 0;
+ }
+
public static bool Reprotect(IntPtr address, ulong size, Memory.MemoryProtection protection)
{
MmapProts prot = GetProtection(protection);
@@ -37,6 +42,24 @@ namespace ARMeilleure.Memory
return Syscall.mprotect(address, size, prot) == 0;
}
+ public static IntPtr Reserve(ulong size)
+ {
+ ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
+
+ const MmapProts prot = MmapProts.PROT_NONE;
+
+ const MmapFlags flags = MmapFlags.MAP_PRIVATE | MmapFlags.MAP_ANONYMOUS;
+
+ IntPtr ptr = Syscall.mmap(IntPtr.Zero, size + pageSize, prot, flags, -1, 0);
+
+ if (ptr == IntPtr.Zero)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ return ptr;
+ }
+
private static MmapProts GetProtection(Memory.MemoryProtection protection)
{
switch (protection)
diff --git a/ARMeilleure/Memory/MemoryManagementWindows.cs b/ARMeilleure/Memory/MemoryManagementWindows.cs
index ae64b5c6..a9455063 100644
--- a/ARMeilleure/Memory/MemoryManagementWindows.cs
+++ b/ARMeilleure/Memory/MemoryManagementWindows.cs
@@ -89,6 +89,15 @@ namespace ARMeilleure.Memory
return ptr;
}
+ public static bool Commit(IntPtr location, IntPtr size)
+ {
+ const AllocationType flags = AllocationType.Commit;
+
+ IntPtr ptr = VirtualAlloc(location, size, flags, MemoryProtection.ReadWrite);
+
+ return ptr != IntPtr.Zero;
+ }
+
public static bool Reprotect(IntPtr address, IntPtr size, Memory.MemoryProtection protection)
{
MemoryProtection prot = GetProtection(protection);
@@ -96,6 +105,20 @@ namespace ARMeilleure.Memory
return VirtualProtect(address, size, prot, out _);
}
+ public static IntPtr Reserve(IntPtr size)
+ {
+ const AllocationType flags = AllocationType.Reserve;
+
+ IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
+
+ if (ptr == IntPtr.Zero)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ return ptr;
+ }
+
private static MemoryProtection GetProtection(Memory.MemoryProtection protection)
{
switch (protection)
diff --git a/ARMeilleure/Memory/MemoryManagerPal.cs b/ARMeilleure/Memory/MemoryManagerPal.cs
index 64191a0a..66c43642 100644
--- a/ARMeilleure/Memory/MemoryManagerPal.cs
+++ b/ARMeilleure/Memory/MemoryManagerPal.cs
@@ -53,7 +53,7 @@ namespace ARMeilleure.Memory
Operand expected = context.LoadArgument(OperandType.V128, 1);
Operand desired = context.LoadArgument(OperandType.V128, 2);
- Operand result = context.CompareAndSwap128(address, expected, desired);
+ Operand result = context.CompareAndSwap(address, expected, desired);
context.Return(result);
diff --git a/ARMeilleure/Memory/ReservedRegion.cs b/ARMeilleure/Memory/ReservedRegion.cs
new file mode 100644
index 00000000..521019ad
--- /dev/null
+++ b/ARMeilleure/Memory/ReservedRegion.cs
@@ -0,0 +1,53 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace ARMeilleure.Memory
+{
+ class ReservedRegion
+ {
+ private const int DefaultGranularity = 65536; // Mapping granularity in Windows.
+
+ public IntPtr Pointer { get; }
+
+ private ulong _maxSize;
+ private ulong _sizeGranularity;
+ private ulong _currentSize;
+
+ public ReservedRegion(ulong maxSize, ulong granularity = 0)
+ {
+ if (granularity == 0)
+ {
+ granularity = DefaultGranularity;
+ }
+
+ Pointer = MemoryManagement.Reserve(maxSize);
+ _maxSize = maxSize;
+ _sizeGranularity = granularity;
+ _currentSize = 0;
+ }
+
+ public void ExpandIfNeeded(ulong desiredSize)
+ {
+ if (desiredSize > _maxSize)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ if (desiredSize > _currentSize)
+ {
+ // Lock, and then check again. We only want to commit once.
+ lock (this)
+ {
+ if (desiredSize >= _currentSize)
+ {
+ ulong overflowBytes = desiredSize - _currentSize;
+ ulong moreToCommit = (((_sizeGranularity - 1) + overflowBytes) / _sizeGranularity) * _sizeGranularity; // Round up.
+ MemoryManagement.Commit(new IntPtr((long)Pointer + (long)_currentSize), moreToCommit);
+ _currentSize += moreToCommit;
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/ARMeilleure/State/ExecutionContext.cs b/ARMeilleure/State/ExecutionContext.cs
index 482665db..57a05dbf 100644
--- a/ARMeilleure/State/ExecutionContext.cs
+++ b/ARMeilleure/State/ExecutionContext.cs
@@ -5,7 +5,7 @@ namespace ARMeilleure.State
{
public class ExecutionContext
{
- private const int MinCountForCheck = 40000;
+ private const int MinCountForCheck = 4000;
private NativeContext _nativeContext;
@@ -57,7 +57,7 @@ namespace ARMeilleure.State
}
}
- public bool Running { get; set; }
+ internal bool Running { get; private set; }
public event EventHandler<EventArgs> Interrupt;
public event EventHandler<InstExceptionEventArgs> Break;
@@ -126,6 +126,12 @@ namespace ARMeilleure.State
Undefined?.Invoke(this, new InstUndefinedEventArgs(address, opCode));
}
+ public void StopRunning()
+ {
+ Running = false;
+ _nativeContext.SetCounter(0);
+ }
+
public void Dispose()
{
_nativeContext.Dispose();
diff --git a/ARMeilleure/State/NativeContext.cs b/ARMeilleure/State/NativeContext.cs
index eb54505c..0ab9a3fd 100644
--- a/ARMeilleure/State/NativeContext.cs
+++ b/ARMeilleure/State/NativeContext.cs
@@ -10,7 +10,7 @@ namespace ARMeilleure.State
private const int IntSize = 8;
private const int VecSize = 16;
private const int FlagSize = 4;
- private const int ExtraSize = 4;
+ private const int ExtraSize = 8;
private const int TotalSize = RegisterConsts.IntRegsCount * IntSize +
RegisterConsts.VecRegsCount * VecSize +
@@ -183,6 +183,14 @@ namespace ARMeilleure.State
RegisterConsts.FpFlagsCount * FlagSize;
}
+ public static int GetCallAddressOffset()
+ {
+ return RegisterConsts.IntRegsCount * IntSize +
+ RegisterConsts.VecRegsCount * VecSize +
+ RegisterConsts.FlagsCount * FlagSize +
+ RegisterConsts.FpFlagsCount * FlagSize + 4;
+ }
+
public void Dispose()
{
MemoryManagement.Free(BasePtr);
diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs
index d35e985e..d1a2c92d 100644
--- a/ARMeilleure/Translation/ArmEmitterContext.cs
+++ b/ARMeilleure/Translation/ArmEmitterContext.cs
@@ -41,10 +41,19 @@ namespace ARMeilleure.Translation
public Aarch32Mode Mode { get; }
- public ArmEmitterContext(MemoryManager memory, Aarch32Mode mode)
+ public JumpTable JumpTable { get; }
+
+ public long BaseAddress { get; }
+
+ public bool HighCq { get; }
+
+ public ArmEmitterContext(MemoryManager memory, JumpTable jumpTable, long baseAddress, bool highCq, Aarch32Mode mode)
{
- Memory = memory;
- Mode = mode;
+ Memory = memory;
+ JumpTable = jumpTable;
+ BaseAddress = baseAddress;
+ HighCq = highCq;
+ Mode = mode;
_labels = new Dictionary<ulong, Operand>();
}
diff --git a/ARMeilleure/Translation/DirectCallStubs.cs b/ARMeilleure/Translation/DirectCallStubs.cs
new file mode 100644
index 00000000..e6e87b2b
--- /dev/null
+++ b/ARMeilleure/Translation/DirectCallStubs.cs
@@ -0,0 +1,131 @@
+using ARMeilleure.Instructions;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using System;
+using System.Runtime.InteropServices;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+ static class DirectCallStubs
+ {
+ private delegate long GuestFunction(IntPtr nativeContextPtr);
+
+ private static GuestFunction _directCallStub;
+ private static GuestFunction _directTailCallStub;
+ private static GuestFunction _indirectCallStub;
+ private static GuestFunction _indirectTailCallStub;
+
+ private static object _lock;
+ private static bool _initialized;
+
+ static DirectCallStubs()
+ {
+ _lock = new object();
+ }
+
+ public static void InitializeStubs()
+ {
+ if (_initialized) return;
+ lock (_lock)
+ {
+ if (_initialized) return;
+ _directCallStub = GenerateDirectCallStub(false);
+ _directTailCallStub = GenerateDirectCallStub(true);
+ _indirectCallStub = GenerateIndirectCallStub(false);
+ _indirectTailCallStub = GenerateIndirectCallStub(true);
+ _initialized = true;
+ }
+ }
+
+ public static IntPtr DirectCallStub(bool tailCall)
+ {
+ return Marshal.GetFunctionPointerForDelegate(tailCall ? _directTailCallStub : _directCallStub);
+ }
+
+ public static IntPtr IndirectCallStub(bool tailCall)
+ {
+ return Marshal.GetFunctionPointerForDelegate(tailCall ? _indirectTailCallStub : _indirectCallStub);
+ }
+
+ private static void EmitCall(EmitterContext context, Operand address, bool tailCall)
+ {
+ if (tailCall)
+ {
+ context.Tailcall(address, context.LoadArgument(OperandType.I64, 0));
+ }
+ else
+ {
+ context.Return(context.Call(address, OperandType.I64, context.LoadArgument(OperandType.I64, 0)));
+ }
+ }
+
+ /// <summary>
+ /// Generates a stub that is used to find function addresses. Used for direct calls when their jump table does not have the host address yet.
+ /// Takes a NativeContext like a translated guest function, and extracts the target address from the NativeContext.
+ /// When the target function is compiled in highCq, all table entries are updated to point to that function instead of this stub by the translator.
+ /// </summary>
+ private static GuestFunction GenerateDirectCallStub(bool tailCall)
+ {
+ EmitterContext context = new EmitterContext();
+
+ Operand nativeContextPtr = context.LoadArgument(OperandType.I64, 0);
+
+ Operand address = context.Load(OperandType.I64, context.Add(nativeContextPtr, Const((long)NativeContext.GetCallAddressOffset())));
+
+ address = context.BitwiseOr(address, Const(address.Type, 1)); // Set call flag.
+ Operand functionAddr = context.Call(new _U64_U64(NativeInterface.GetFunctionAddress), address);
+ EmitCall(context, functionAddr, tailCall);
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[]
+ {
+ OperandType.I64
+ };
+
+ return Compiler.Compile<GuestFunction>(
+ cfg,
+ argTypes,
+ OperandType.I64,
+ CompilerOptions.HighCq);
+ }
+
+ /// <summary>
+ /// Generates a stub that is used to find function addresses and add them to an indirect table.
+ /// Used for indirect calls entries (already claimed) when their jump table does not have the host address yet.
+ /// Takes a NativeContext like a translated guest function, and extracts the target indirect table entry from the NativeContext.
+ /// If the function we find is highCq, the entry in the table is updated to point to that function rather than this stub.
+ /// </summary>
+ private static GuestFunction GenerateIndirectCallStub(bool tailCall)
+ {
+ EmitterContext context = new EmitterContext();
+
+ Operand nativeContextPtr = context.LoadArgument(OperandType.I64, 0);
+
+ Operand entryAddress = context.Load(OperandType.I64, context.Add(nativeContextPtr, Const((long)NativeContext.GetCallAddressOffset())));
+ Operand address = context.Load(OperandType.I64, entryAddress);
+
+ // We need to find the missing function. If the function is HighCq, then it replaces this stub in the indirect table.
+ // Either way, we call it afterwards.
+ Operand functionAddr = context.Call(new _U64_U64_U64(NativeInterface.GetIndirectFunctionAddress), address, entryAddress);
+
+ // Call and save the function.
+ EmitCall(context, functionAddr, tailCall);
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[]
+ {
+ OperandType.I64
+ };
+
+ return Compiler.Compile<GuestFunction>(
+ cfg,
+ argTypes,
+ OperandType.I64,
+ CompilerOptions.HighCq);
+ }
+ }
+}
diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs
index a125a715..a11d25a6 100644
--- a/ARMeilleure/Translation/EmitterContext.cs
+++ b/ARMeilleure/Translation/EmitterContext.cs
@@ -143,9 +143,22 @@ namespace ARMeilleure.Translation
}
}
- public Operand CompareAndSwap128(Operand address, Operand expected, Operand desired)
+ public void Tailcall(Operand address, params Operand[] callArgs)
{
- return Add(Instruction.CompareAndSwap128, Local(OperandType.V128), address, expected, desired);
+ Operand[] args = new Operand[callArgs.Length + 1];
+
+ args[0] = address;
+
+ Array.Copy(callArgs, 0, args, 1, callArgs.Length);
+
+ Add(Instruction.Tailcall, null, args);
+
+ _needsNewBlock = true;
+ }
+
+ public Operand CompareAndSwap(Operand address, Operand expected, Operand desired)
+ {
+ return Add(Instruction.CompareAndSwap, Local(desired.Type), address, expected, desired);
}
public Operand ConditionalSelect(Operand op1, Operand op2, Operand op3)
diff --git a/ARMeilleure/Translation/JitCache.cs b/ARMeilleure/Translation/JitCache.cs
index 73f04a96..b004cc22 100644
--- a/ARMeilleure/Translation/JitCache.cs
+++ b/ARMeilleure/Translation/JitCache.cs
@@ -13,9 +13,11 @@ namespace ARMeilleure.Translation
private const int CodeAlignment = 4; // Bytes
- private const int CacheSize = 512 * 1024 * 1024;
+ private const int CacheSize = 2047 * 1024 * 1024;
- private static IntPtr _basePointer;
+ private static ReservedRegion _jitRegion;
+
+ private static IntPtr _basePointer => _jitRegion.Pointer;
private static int _offset;
@@ -25,10 +27,11 @@ namespace ARMeilleure.Translation
static JitCache()
{
- _basePointer = MemoryManagement.Allocate(CacheSize);
+ _jitRegion = new ReservedRegion(CacheSize);
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
+ _jitRegion.ExpandIfNeeded(PageSize);
JitUnwindWindows.InstallFunctionTableHandler(_basePointer, CacheSize);
// The first page is used for the table based SEH structs.
@@ -97,6 +100,8 @@ namespace ARMeilleure.Translation
_offset += codeSize;
+ _jitRegion.ExpandIfNeeded((ulong)_offset);
+
if ((ulong)(uint)_offset > CacheSize)
{
throw new OutOfMemoryException();
diff --git a/ARMeilleure/Translation/JumpTable.cs b/ARMeilleure/Translation/JumpTable.cs
new file mode 100644
index 00000000..5cad2944
--- /dev/null
+++ b/ARMeilleure/Translation/JumpTable.cs
@@ -0,0 +1,149 @@
+using ARMeilleure.Memory;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace ARMeilleure.Translation
+{
+ class JumpTable
+ {
+ public static JumpTable Instance { get; }
+
+ static JumpTable()
+ {
+ Instance = new JumpTable();
+ }
+
+ // The jump table is a block of (guestAddress, hostAddress) function mappings.
+ // Each entry corresponds to one branch in a JIT compiled function. The entries are
+ // reserved specifically for each call.
+ // The _dependants dictionary can be used to update the hostAddress for any functions that change.
+
+ public const int JumpTableStride = 16; // 8 byte guest address, 8 byte host address
+
+ private const int JumpTableSize = 1048576;
+
+ private const int JumpTableByteSize = JumpTableSize * JumpTableStride;
+
+ // The dynamic table is also a block of (guestAddress, hostAddress) function mappings.
+ // The main difference is that indirect calls and jumps reserve _multiple_ entries on the table.
+ // These start out as all 0. When an indirect call is made, it tries to find the guest address on the table.
+
+ // If we get to an empty address, the guestAddress is set to the call that we want.
+
+ // If we get to a guestAddress that matches our own (or we just claimed it), the hostAddress is read.
+ // If it is non-zero, we immediately branch or call the host function.
+ // If it is 0, NativeInterface is called to find the rejited address of the call.
+ // If none is found, the hostAddress entry stays at 0. Otherwise, the new address is placed in the entry.
+
+ // If the table size is exhausted and we didn't find our desired address, we fall back to requesting
+ // the function from the JIT.
+
+ private const int DynamicTableSize = 1048576;
+
+ public const int DynamicTableElems = 1;
+
+ public const int DynamicTableStride = DynamicTableElems * JumpTableStride;
+
+ private const int DynamicTableByteSize = DynamicTableSize * JumpTableStride * DynamicTableElems;
+
+ private int _tableEnd = 0;
+ private int _dynTableEnd = 0;
+
+ private ConcurrentDictionary<ulong, TranslatedFunction> _targets;
+ private ConcurrentDictionary<ulong, LinkedList<int>> _dependants; // TODO: Attach to TranslatedFunction or a wrapper class.
+
+ private ReservedRegion _jumpRegion;
+ private ReservedRegion _dynamicRegion;
+ public IntPtr JumpPointer => _jumpRegion.Pointer;
+ public IntPtr DynamicPointer => _dynamicRegion.Pointer;
+
+ public JumpTable()
+ {
+ _jumpRegion = new ReservedRegion(JumpTableByteSize);
+ _dynamicRegion = new ReservedRegion(DynamicTableByteSize);
+
+ _targets = new ConcurrentDictionary<ulong, TranslatedFunction>();
+ _dependants = new ConcurrentDictionary<ulong, LinkedList<int>>();
+ }
+
+ public void RegisterFunction(ulong address, TranslatedFunction func) {
+ address &= ~3UL;
+ _targets.AddOrUpdate(address, func, (key, oldFunc) => func);
+ long funcPtr = func.GetPointer().ToInt64();
+
+ // Update all jump table entries that target this address.
+ LinkedList<int> myDependants;
+ if (_dependants.TryGetValue(address, out myDependants))
+ {
+ lock (myDependants)
+ {
+ foreach (var entry in myDependants)
+ {
+ IntPtr addr = _jumpRegion.Pointer + entry * JumpTableStride;
+ Marshal.WriteInt64(addr, 8, funcPtr);
+ }
+ }
+ }
+ }
+
+ public int ReserveDynamicEntry(bool isJump)
+ {
+ int entry = Interlocked.Increment(ref _dynTableEnd);
+ if (entry >= DynamicTableSize)
+ {
+ throw new OutOfMemoryException("JIT Dynamic Jump Table exhausted.");
+ }
+
+ _dynamicRegion.ExpandIfNeeded((ulong)((entry + 1) * DynamicTableStride));
+
+ // Initialize all host function pointers to the indirect call stub.
+
+ IntPtr addr = _dynamicRegion.Pointer + entry * DynamicTableStride;
+ long stubPtr = (long)DirectCallStubs.IndirectCallStub(isJump);
+
+ for (int i = 0; i < DynamicTableElems; i++)
+ {
+ Marshal.WriteInt64(addr, i * JumpTableStride + 8, stubPtr);
+ }
+
+ return entry;
+ }
+
+ public int ReserveTableEntry(long ownerAddress, long address, bool isJump)
+ {
+ int entry = Interlocked.Increment(ref _tableEnd);
+ if (entry >= JumpTableSize)
+ {
+ throw new OutOfMemoryException("JIT Direct Jump Table exhausted.");
+ }
+
+ _jumpRegion.ExpandIfNeeded((ulong)((entry + 1) * JumpTableStride));
+
+ // Is the address we have already registered? If so, put the function address in the jump table.
+ // If not, it will point to the direct call stub.
+ long value = (long)DirectCallStubs.DirectCallStub(isJump);
+ TranslatedFunction func;
+ if (_targets.TryGetValue((ulong)address, out func))
+ {
+ value = func.GetPointer().ToInt64();
+ }
+
+ // Make sure changes to the function at the target address update this jump table entry.
+ LinkedList<int> targetDependants = _dependants.GetOrAdd((ulong)address, (addr) => new LinkedList<int>());
+ lock (targetDependants)
+ {
+ targetDependants.AddLast(entry);
+ }
+
+ IntPtr addr = _jumpRegion.Pointer + entry * JumpTableStride;
+
+ Marshal.WriteInt64(addr, 0, address);
+ Marshal.WriteInt64(addr, 8, value);
+
+ return entry;
+ }
+ }
+}
diff --git a/ARMeilleure/Translation/TranslatedFunction.cs b/ARMeilleure/Translation/TranslatedFunction.cs
index 06069cf8..af01aaab 100644
--- a/ARMeilleure/Translation/TranslatedFunction.cs
+++ b/ARMeilleure/Translation/TranslatedFunction.cs
@@ -1,3 +1,5 @@
+using System;
+using System.Runtime.InteropServices;
using System.Threading;
namespace ARMeilleure.Translation
@@ -11,6 +13,8 @@ namespace ARMeilleure.Translation
private bool _rejit;
private int _callCount;
+ public bool HighCq => !_rejit;
+
public TranslatedFunction(GuestFunction func, bool rejit)
{
_func = func;
@@ -26,5 +30,10 @@ namespace ARMeilleure.Translation
{
return _rejit && Interlocked.Increment(ref _callCount) == MinCallsForRejit;
}
+
+ public IntPtr GetPointer()
+ {
+ return Marshal.GetFunctionPointerForDelegate(_func);
+ }
}
} \ No newline at end of file
diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs
index 3008303e..9d534d58 100644
--- a/ARMeilleure/Translation/Translator.cs
+++ b/ARMeilleure/Translation/Translator.cs
@@ -16,10 +16,14 @@ namespace ARMeilleure.Translation
{
private const ulong CallFlag = InstEmitFlowHelper.CallFlag;
+ private const bool AlwaysTranslateFunctions = true; // If false, only translates a single block for lowCq.
+
private MemoryManager _memory;
private ConcurrentDictionary<ulong, TranslatedFunction> _funcs;
+ private JumpTable _jumpTable;
+
private PriorityQueue<RejitRequest> _backgroundQueue;
private AutoResetEvent _backgroundTranslatorEvent;
@@ -32,9 +36,13 @@ namespace ARMeilleure.Translation
_funcs = new ConcurrentDictionary<ulong, TranslatedFunction>();
+ _jumpTable = JumpTable.Instance;
+
_backgroundQueue = new PriorityQueue<RejitRequest>(2);
_backgroundTranslatorEvent = new AutoResetEvent(false);
+
+ DirectCallStubs.InitializeStubs();
}
private void TranslateQueuedSubs()
@@ -46,30 +54,42 @@ namespace ARMeilleure.Translation
TranslatedFunction func = Translate(request.Address, request.Mode, highCq: true);
_funcs.AddOrUpdate(request.Address, func, (key, oldFunc) => func);
+ _jumpTable.RegisterFunction(request.Address, func);
}
else
{
_backgroundTranslatorEvent.WaitOne();
}
}
+ _backgroundTranslatorEvent.Set(); // Wake up any other background translator threads, to encourage them to exit.
}
public void Execute(State.ExecutionContext context, ulong address)
{
if (Interlocked.Increment(ref _threadCount) == 1)
{
- Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs)
+ // Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core+ht etc).
+ // All threads are normal priority except from the last, which just fills as much of the last core as the os lets it with a low priority.
+ // If we only have one rejit thread, it should be normal priority as highCq code is performance critical.
+ // TODO: Use physical cores rather than logical. This only really makes sense for processors with hyperthreading. Requires OS specific code.
+ int unboundedThreadCount = Math.Max(1, (Environment.ProcessorCount - 6) / 3);
+ int threadCount = Math.Min(3, unboundedThreadCount);
+ for (int i = 0; i < threadCount; i++)
{
- Name = "CPU.BackgroundTranslatorThread",
- Priority = ThreadPriority.Lowest
- };
+ bool last = i != 0 && i == unboundedThreadCount - 1;
+ Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs)
+ {
+ Name = "CPU.BackgroundTranslatorThread." + i,
+ Priority = last ? ThreadPriority.Lowest : ThreadPriority.Normal
+ };
- backgroundTranslatorThread.Start();
+ backgroundTranslatorThread.Start();
+ }
}
Statistics.InitializeTimer();
- NativeInterface.RegisterThread(context, _memory);
+ NativeInterface.RegisterThread(context, _memory, this);
do
{
@@ -98,7 +118,7 @@ namespace ARMeilleure.Translation
return nextAddr;
}
- private TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
+ internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
{
// TODO: Investigate how we should handle code at unaligned addresses.
// Currently, those low bits are used to store special flags.
@@ -124,12 +144,12 @@ namespace ARMeilleure.Translation
private TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq)
{
- ArmEmitterContext context = new ArmEmitterContext(_memory, Aarch32Mode.User);
+ ArmEmitterContext context = new ArmEmitterContext(_memory, _jumpTable, (long)address, highCq, Aarch32Mode.User);
Logger.StartPass(PassName.Decoding);
- Block[] blocks = highCq
- ? Decoder.DecodeFunction (_memory, address, mode)
+ Block[] blocks = AlwaysTranslateFunctions
+ ? Decoder.DecodeFunction (_memory, address, mode, highCq)
: Decoder.DecodeBasicBlock(_memory, address, mode);
Logger.EndPass(PassName.Decoding);
@@ -216,7 +236,7 @@ namespace ARMeilleure.Translation
// with some kind of branch).
if (isLastOp && block.Next == null)
{
- context.Return(Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes));
+ InstEmitFlowHelper.EmitTailContinue(context, Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes));
}
}
}
@@ -238,7 +258,11 @@ namespace ARMeilleure.Translation
context.BranchIfTrue(lblNonZero, count);
- context.Call(new _Void(NativeInterface.CheckSynchronization));
+ Operand running = context.Call(new _Bool(NativeInterface.CheckSynchronization));
+
+ context.BranchIfTrue(lblExit, running);
+
+ context.Return(Const(0L));
context.Branch(lblExit);
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs
index 1a213b92..c4161d54 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs
@@ -137,7 +137,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
public void ExitThread(KThread thread)
{
- thread.Context.Running = false;
+ thread.Context.StopRunning();
CoreManager.Exit(thread.HostThread);
}
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
index 53eb5bdc..cd60c955 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
@@ -1141,9 +1141,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
{
Owner.Translator.Execute(Context, entrypoint);
- Context.Dispose();
-
ThreadExit();
+
+ Context.Dispose();
}
private void ThreadExit()