aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgdkchan <gab.dark.100@gmail.com>2019-02-27 23:03:31 -0300
committerjduncanator <1518948+jduncanator@users.noreply.github.com>2019-02-28 13:03:31 +1100
commite21ebbf666f10d39d44a0856e5a44143d3d69d0d (patch)
tree40d25d600ed121eeb397ff24ac7d7d7112b0a079
parent884b4e5fd3c2a54ebb796b7f995c0eda9c4d0038 (diff)
Misc. CPU optimizations (#575)
* Add optimizations related to caller/callee saved registers, thread synchronization and disable tier 0 * Refactoring * Add a config entry to enable or disable the reg load/store opt. * Remove unnecessary register state stores for calls when the callee is know * Rename IoType to VarType * Enable tier 0 while fixing some perf issues related to tier 0 * Small tweak -- Compile before adding to the cache, to avoid lags * Add required config entry
-rw-r--r--ChocolArm64/Instructions/InstEmitFlow.cs3
-rw-r--r--ChocolArm64/Instructions/InstEmitFlow32.cs1
-rw-r--r--ChocolArm64/Instructions/InstEmitFlowHelper.cs26
-rw-r--r--ChocolArm64/Optimizations.cs32
-rw-r--r--ChocolArm64/Translation/CallType.cs9
-rw-r--r--ChocolArm64/Translation/ILBlock.cs32
-rw-r--r--ChocolArm64/Translation/ILEmitterCtx.cs89
-rw-r--r--ChocolArm64/Translation/ILLabel.cs6
-rw-r--r--ChocolArm64/Translation/ILMethodBuilder.cs45
-rw-r--r--ChocolArm64/Translation/ILOpCode.cs6
-rw-r--r--ChocolArm64/Translation/ILOpCodeBranch.cs10
-rw-r--r--ChocolArm64/Translation/ILOpCodeCall.cs4
-rw-r--r--ChocolArm64/Translation/ILOpCodeConst.cs2
-rw-r--r--ChocolArm64/Translation/ILOpCodeLoad.cs20
-rw-r--r--ChocolArm64/Translation/ILOpCodeLoadField.cs2
-rw-r--r--ChocolArm64/Translation/ILOpCodeLoadState.cs17
-rw-r--r--ChocolArm64/Translation/ILOpCodeLog.cs6
-rw-r--r--ChocolArm64/Translation/ILOpCodeStore.cs20
-rw-r--r--ChocolArm64/Translation/ILOpCodeStoreState.cs26
-rw-r--r--ChocolArm64/Translation/RegisterUsage.cs (renamed from ChocolArm64/Translation/LocalAlloc.cs)189
-rw-r--r--ChocolArm64/Translation/TranslatedSub.cs55
-rw-r--r--ChocolArm64/Translation/Translator.cs56
-rw-r--r--ChocolArm64/Translation/TranslatorQueue.cs14
-rw-r--r--ChocolArm64/Translation/TranslatorQueueItem.cs15
-rw-r--r--ChocolArm64/Translation/VarType.cs (renamed from ChocolArm64/Translation/IoType.cs)2
-rw-r--r--Ryujinx/Config.jsonc17
-rw-r--r--Ryujinx/Configuration.cs10
-rw-r--r--Ryujinx/_schema.json12
28 files changed, 451 insertions, 275 deletions
diff --git a/ChocolArm64/Instructions/InstEmitFlow.cs b/ChocolArm64/Instructions/InstEmitFlow.cs
index a842dca9..5eae89cc 100644
--- a/ChocolArm64/Instructions/InstEmitFlow.cs
+++ b/ChocolArm64/Instructions/InstEmitFlow.cs
@@ -39,7 +39,6 @@ namespace ChocolArm64.Instructions
context.EmitLdc_I(op.Position + 4);
context.EmitStint(RegisterAlias.Lr);
- context.EmitStoreState();
EmitCall(context, op.Imm);
}
@@ -60,6 +59,8 @@ namespace ChocolArm64.Instructions
{
OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp;
+ context.HasIndirectJump = true;
+
context.EmitStoreState();
context.EmitLdintzr(op.Rn);
diff --git a/ChocolArm64/Instructions/InstEmitFlow32.cs b/ChocolArm64/Instructions/InstEmitFlow32.cs
index 61f1d34c..dea490c7 100644
--- a/ChocolArm64/Instructions/InstEmitFlow32.cs
+++ b/ChocolArm64/Instructions/InstEmitFlow32.cs
@@ -65,7 +65,6 @@ namespace ChocolArm64.Instructions
}
context.EmitStint(GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr));
- context.EmitStoreState();
//If x is true, then this is a branch with link and exchange.
//In this case we need to swap the mode between Arm <-> Thumb.
diff --git a/ChocolArm64/Instructions/InstEmitFlowHelper.cs b/ChocolArm64/Instructions/InstEmitFlowHelper.cs
index e93ef426..a6091a57 100644
--- a/ChocolArm64/Instructions/InstEmitFlowHelper.cs
+++ b/ChocolArm64/Instructions/InstEmitFlowHelper.cs
@@ -11,6 +11,8 @@ namespace ChocolArm64.Instructions
{
if (context.Tier == TranslationTier.Tier0)
{
+ context.EmitStoreState();
+
context.TranslateAhead(imm);
context.EmitLdc_I8(imm);
@@ -22,6 +24,10 @@ namespace ChocolArm64.Instructions
if (!context.TryOptEmitSubroutineCall())
{
+ context.HasSlowCall = true;
+
+ context.EmitStoreState();
+
context.TranslateAhead(imm);
context.EmitLdarg(TranslatedSub.StateArgIdx);
@@ -32,6 +38,7 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdc_I8(imm);
+ context.EmitLdc_I4((int)CallType.Call);
context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
@@ -58,20 +65,6 @@ namespace ChocolArm64.Instructions
{
if (context.Tier == TranslationTier.Tier0)
{
- context.Emit(OpCodes.Dup);
-
- context.EmitSttmp();
- context.EmitLdarg(TranslatedSub.StateArgIdx);
-
- context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator),
- BindingFlags.Instance |
- BindingFlags.NonPublic));
-
- context.EmitLdarg(TranslatedSub.StateArgIdx);
- context.EmitLdtmp();
-
- context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine));
-
context.Emit(OpCodes.Ret);
}
else
@@ -85,8 +78,11 @@ namespace ChocolArm64.Instructions
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp();
+ context.EmitLdc_I4(isJump
+ ? (int)CallType.VirtualJump
+ : (int)CallType.VirtualCall);
- context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine));
+ context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine));
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
diff --git a/ChocolArm64/Optimizations.cs b/ChocolArm64/Optimizations.cs
index 8fa6f462..cbb8131f 100644
--- a/ChocolArm64/Optimizations.cs
+++ b/ChocolArm64/Optimizations.cs
@@ -2,21 +2,23 @@ using System.Runtime.Intrinsics.X86;
public static class Optimizations
{
- internal static bool FastFP = true;
+ public static bool AssumeStrictAbiCompliance { get; set; }
- private static bool _useAllSseIfAvailable = true;
+ public static bool FastFP { get; set; } = true;
- private static bool _useSseIfAvailable = true;
- private static bool _useSse2IfAvailable = true;
- private static bool _useSse3IfAvailable = true;
- private static bool _useSsse3IfAvailable = true;
- private static bool _useSse41IfAvailable = true;
- private static bool _useSse42IfAvailable = true;
+ private const bool UseAllSseIfAvailable = true;
- internal static bool UseSse = (_useAllSseIfAvailable && _useSseIfAvailable) && Sse.IsSupported;
- internal static bool UseSse2 = (_useAllSseIfAvailable && _useSse2IfAvailable) && Sse2.IsSupported;
- internal static bool UseSse3 = (_useAllSseIfAvailable && _useSse3IfAvailable) && Sse3.IsSupported;
- internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported;
- internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported;
- internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported;
-}
+ public static bool UseSseIfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse2IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse3IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable;
+
+ internal static bool UseSse => UseSseIfAvailable && Sse.IsSupported;
+ internal static bool UseSse2 => UseSse2IfAvailable && Sse2.IsSupported;
+ internal static bool UseSse3 => UseSse3IfAvailable && Sse3.IsSupported;
+ internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported;
+ internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported;
+ internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported;
+} \ No newline at end of file
diff --git a/ChocolArm64/Translation/CallType.cs b/ChocolArm64/Translation/CallType.cs
new file mode 100644
index 00000000..937ede76
--- /dev/null
+++ b/ChocolArm64/Translation/CallType.cs
@@ -0,0 +1,9 @@
+namespace ChocolArm64.Translation
+{
+ enum CallType
+ {
+ Call,
+ VirtualCall,
+ VirtualJump
+ }
+} \ No newline at end of file
diff --git a/ChocolArm64/Translation/ILBlock.cs b/ChocolArm64/Translation/ILBlock.cs
index 13657901..12773705 100644
--- a/ChocolArm64/Translation/ILBlock.cs
+++ b/ChocolArm64/Translation/ILBlock.cs
@@ -4,13 +4,13 @@ namespace ChocolArm64.Translation
{
class ILBlock : IILEmit
{
- public long IntInputs { get; private set; }
- public long IntOutputs { get; private set; }
- public long IntAwOutputs { get; private set; }
+ public long IntInputs { get; private set; }
+ public long IntOutputs { get; private set; }
+ private long _intAwOutputs;
- public long VecInputs { get; private set; }
- public long VecOutputs { get; private set; }
- public long VecAwOutputs { get; private set; }
+ public long VecInputs { get; private set; }
+ public long VecOutputs { get; private set; }
+ private long _vecAwOutputs;
public bool HasStateStore { get; private set; }
@@ -34,25 +34,25 @@ namespace ChocolArm64.Translation
//opcodes emitted by each ARM instruction.
//We can only consider the new outputs for doing input elimination
//after all the CIL opcodes used by the instruction being emitted.
- IntAwOutputs = IntOutputs;
- VecAwOutputs = VecOutputs;
+ _intAwOutputs = IntOutputs;
+ _vecAwOutputs = VecOutputs;
}
else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index))
{
- switch (ld.IoType)
+ switch (ld.VarType)
{
- case IoType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~IntAwOutputs; break;
- case IoType.Int: IntInputs |= (1L << ld.Index) & ~IntAwOutputs; break;
- case IoType.Vector: VecInputs |= (1L << ld.Index) & ~VecAwOutputs; break;
+ case VarType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~_intAwOutputs; break;
+ case VarType.Int: IntInputs |= (1L << ld.Index) & ~_intAwOutputs; break;
+ case VarType.Vector: VecInputs |= (1L << ld.Index) & ~_vecAwOutputs; break;
}
}
else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index))
{
- switch (st.IoType)
+ switch (st.VarType)
{
- case IoType.Flag: IntOutputs |= (1L << st.Index) << 32; break;
- case IoType.Int: IntOutputs |= 1L << st.Index; break;
- case IoType.Vector: VecOutputs |= 1L << st.Index; break;
+ case VarType.Flag: IntOutputs |= (1L << st.Index) << 32; break;
+ case VarType.Int: IntOutputs |= 1L << st.Index; break;
+ case VarType.Vector: VecOutputs |= 1L << st.Index; break;
}
}
else if (emitter is ILOpCodeStoreState)
diff --git a/ChocolArm64/Translation/ILEmitterCtx.cs b/ChocolArm64/Translation/ILEmitterCtx.cs
index f7e61bc9..91b72b13 100644
--- a/ChocolArm64/Translation/ILEmitterCtx.cs
+++ b/ChocolArm64/Translation/ILEmitterCtx.cs
@@ -31,6 +31,10 @@ namespace ChocolArm64.Translation
public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO
+ public bool HasIndirectJump { get; set; }
+
+ public bool HasSlowCall { get; set; }
+
private Dictionary<Block, ILBlock> _visitedBlocks;
private Queue<Block> _branchTargets;
@@ -91,7 +95,12 @@ namespace ChocolArm64.Translation
ResetBlockState();
- AdvanceOpCode();
+ if (AdvanceOpCode())
+ {
+ EmitSynchronization();
+
+ _ilBlock.Add(new ILOpCodeLoadState(_ilBlock, isSubEntry: true));
+ }
}
public static int GetIntTempIndex()
@@ -127,10 +136,18 @@ namespace ChocolArm64.Translation
return;
}
- if (_opcIndex == 0)
+ int opcIndex = _opcIndex;
+
+ if (opcIndex == 0)
{
MarkLabel(GetLabel(_currBlock.Position));
+ }
+
+ bool isLastOp = opcIndex == CurrBlock.OpCodes.Count - 1;
+ if (isLastOp && CurrBlock.Branch != null &&
+ (ulong)CurrBlock.Branch.Position <= (ulong)CurrBlock.Position)
+ {
EmitSynchronization();
}
@@ -161,7 +178,7 @@ namespace ChocolArm64.Translation
//of the next instruction to be executed (in the case that the condition
//is false, and the branch was not taken, as all basic blocks should end with
//some kind of branch).
- if (CurrOp == CurrBlock.GetLastOp() && CurrBlock.Next == null)
+ if (isLastOp && CurrBlock.Next == null)
{
EmitStoreState();
EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes);
@@ -285,32 +302,43 @@ namespace ChocolArm64.Translation
return;
}
- _queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1));
+ _queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true);
}
public bool TryOptEmitSubroutineCall()
{
+ //Calls should always have a next block, unless
+ //we're translating a single basic block.
if (_currBlock.Next == null)
{
return false;
}
- if (CurrOp.Emitter != InstEmit.Bl)
+ if (!(CurrOp is IOpCodeBImm op))
+ {
+ return false;
+ }
+
+ if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub))
{
return false;
}
- if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub subroutine))
+ //It's not worth to call a Tier0 method, because
+ //it contains slow code, rather than the entire function.
+ if (sub.Tier == TranslationTier.Tier0)
{
return false;
}
+ EmitStoreState(sub);
+
for (int index = 0; index < TranslatedSub.FixedArgTypes.Length; index++)
{
EmitLdarg(index);
}
- EmitCall(subroutine.Method);
+ EmitCall(sub.Method);
return true;
}
@@ -321,8 +349,8 @@ namespace ChocolArm64.Translation
InstEmitAluHelper.EmitAluLoadOpers(this);
- Stloc(CmpOptTmp2Index, IoType.Int);
- Stloc(CmpOptTmp1Index, IoType.Int);
+ Stloc(CmpOptTmp2Index, VarType.Int);
+ Stloc(CmpOptTmp1Index, VarType.Int);
}
private Dictionary<Condition, OpCode> _branchOps = new Dictionary<Condition, OpCode>()
@@ -346,8 +374,8 @@ namespace ChocolArm64.Translation
{
if (_optOpLastCompare.Emitter == InstEmit.Subs)
{
- Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize);
- Ldloc(CmpOptTmp2Index, IoType.Int, _optOpLastCompare.RegisterSize);
+ Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
+ Ldloc(CmpOptTmp2Index, VarType.Int, _optOpLastCompare.RegisterSize);
Emit(_branchOps[cond], target);
@@ -369,7 +397,7 @@ namespace ChocolArm64.Translation
//Such invalid values can't be encoded on the immediate encodings.
if (_optOpLastCompare is IOpCodeAluImm64 op)
{
- Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize);
+ Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
if (_optOpLastCompare.RegisterSize == RegisterSize.Int32)
{
@@ -491,14 +519,14 @@ namespace ChocolArm64.Translation
{
if (amount > 0)
{
- Stloc(RorTmpIndex, IoType.Int);
- Ldloc(RorTmpIndex, IoType.Int);
+ Stloc(RorTmpIndex, VarType.Int);
+ Ldloc(RorTmpIndex, VarType.Int);
EmitLdc_I4(amount);
Emit(OpCodes.Shr_Un);
- Ldloc(RorTmpIndex, IoType.Int);
+ Ldloc(RorTmpIndex, VarType.Int);
EmitLdc_I4(CurrOp.GetBitsCount() - amount);
@@ -546,7 +574,7 @@ namespace ChocolArm64.Translation
public void EmitLdarg(int index)
{
- _ilBlock.Add(new ILOpCodeLoad(index, IoType.Arg));
+ _ilBlock.Add(new ILOpCodeLoad(index, VarType.Arg));
}
public void EmitLdintzr(int index)
@@ -588,6 +616,11 @@ namespace ChocolArm64.Translation
_ilBlock.Add(new ILOpCodeStoreState(_ilBlock));
}
+ private void EmitStoreState(TranslatedSub callSub)
+ {
+ _ilBlock.Add(new ILOpCodeStoreState(_ilBlock, callSub));
+ }
+
public void EmitLdtmp() => EmitLdint(IntGpTmp1Index);
public void EmitSttmp() => EmitStint(IntGpTmp1Index);
@@ -600,13 +633,13 @@ namespace ChocolArm64.Translation
public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index);
public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index);
- public void EmitLdint(int index) => Ldloc(index, IoType.Int);
- public void EmitStint(int index) => Stloc(index, IoType.Int);
+ public void EmitLdint(int index) => Ldloc(index, VarType.Int);
+ public void EmitStint(int index) => Stloc(index, VarType.Int);
- public void EmitLdvec(int index) => Ldloc(index, IoType.Vector);
- public void EmitStvec(int index) => Stloc(index, IoType.Vector);
+ public void EmitLdvec(int index) => Ldloc(index, VarType.Vector);
+ public void EmitStvec(int index) => Stloc(index, VarType.Vector);
- public void EmitLdflg(int index) => Ldloc(index, IoType.Flag);
+ public void EmitLdflg(int index) => Ldloc(index, VarType.Flag);
public void EmitStflg(int index)
{
//Set this only if any of the NZCV flag bits were modified.
@@ -619,22 +652,22 @@ namespace ChocolArm64.Translation
_optOpLastFlagSet = CurrOp;
}
- Stloc(index, IoType.Flag);
+ Stloc(index, VarType.Flag);
}
- private void Ldloc(int index, IoType ioType)
+ private void Ldloc(int index, VarType varType)
{
- _ilBlock.Add(new ILOpCodeLoad(index, ioType, CurrOp.RegisterSize));
+ _ilBlock.Add(new ILOpCodeLoad(index, varType, CurrOp.RegisterSize));
}
- private void Ldloc(int index, IoType ioType, RegisterSize registerSize)
+ private void Ldloc(int index, VarType varType, RegisterSize registerSize)
{
- _ilBlock.Add(new ILOpCodeLoad(index, ioType, registerSize));
+ _ilBlock.Add(new ILOpCodeLoad(index, varType, registerSize));
}
- private void Stloc(int index, IoType ioType)
+ private void Stloc(int index, VarType varType)
{
- _ilBlock.Add(new ILOpCodeStore(index, ioType, CurrOp.RegisterSize));
+ _ilBlock.Add(new ILOpCodeStore(index, varType, CurrOp.RegisterSize));
}
public void EmitCallPropGet(Type objType, string propName)
diff --git a/ChocolArm64/Translation/ILLabel.cs b/ChocolArm64/Translation/ILLabel.cs
index f423a425..17a31783 100644
--- a/ChocolArm64/Translation/ILLabel.cs
+++ b/ChocolArm64/Translation/ILLabel.cs
@@ -6,7 +6,7 @@ namespace ChocolArm64.Translation
{
private bool _hasLabel;
- private Label _lbl;
+ private Label _label;
public void Emit(ILMethodBuilder context)
{
@@ -17,12 +17,12 @@ namespace ChocolArm64.Translation
{
if (!_hasLabel)
{
- _lbl = context.Generator.DefineLabel();
+ _label = context.Generator.DefineLabel();
_hasLabel = true;
}
- return _lbl;
+ return _label;
}
}
} \ No newline at end of file
diff --git a/ChocolArm64/Translation/ILMethodBuilder.cs b/ChocolArm64/Translation/ILMethodBuilder.cs
index 892f831b..98b50520 100644
--- a/ChocolArm64/Translation/ILMethodBuilder.cs
+++ b/ChocolArm64/Translation/ILMethodBuilder.cs
@@ -8,7 +8,10 @@ namespace ChocolArm64.Translation
{
class ILMethodBuilder
{
- public LocalAlloc LocalAlloc { get; private set; }
+ private const int RegsCount = 32;
+ private const int RegsMask = RegsCount - 1;
+
+ public RegisterUsage RegUsage { get; private set; }
public ILGenerator Generator { get; private set; }
@@ -18,29 +21,47 @@ namespace ChocolArm64.Translation
private string _subName;
+ public bool IsAarch64 { get; }
+
+ public bool IsSubComplete { get; }
+
private int _localsCount;
- public ILMethodBuilder(ILBlock[] ilBlocks, string subName)
+ public ILMethodBuilder(
+ ILBlock[] ilBlocks,
+ string subName,
+ bool isAarch64,
+ bool isSubComplete = false)
{
- _ilBlocks = ilBlocks;
- _subName = subName;
+ _ilBlocks = ilBlocks;
+ _subName = subName;
+ IsAarch64 = isAarch64;
+ IsSubComplete = isSubComplete;
}
- public TranslatedSub GetSubroutine(TranslationTier tier)
+ public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing)
{
- LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]);
+ RegUsage = new RegisterUsage();
+
+ RegUsage.BuildUses(_ilBlocks[0]);
DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes);
- Generator = method.GetILGenerator();
+ long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]);
+ long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]);
- TranslatedSub subroutine = new TranslatedSub(method, tier);
+ TranslatedSub subroutine = new TranslatedSub(
+ method,
+ intNiRegsMask,
+ vecNiRegsMask,
+ tier,
+ isWorthOptimizing);
_locals = new Dictionary<Register, int>();
_localsCount = 0;
- new ILOpCodeLoadState(_ilBlocks[0]).Emit(this);
+ Generator = method.GetILGenerator();
foreach (ILBlock ilBlock in _ilBlocks)
{
@@ -80,13 +101,13 @@ namespace ChocolArm64.Translation
public static Register GetRegFromBit(int bit, RegisterType baseType)
{
- if (bit < 32)
+ if (bit < RegsCount)
{
return new Register(bit, baseType);
}
else if (baseType == RegisterType.Int)
{
- return new Register(bit & 0x1f, RegisterType.Flag);
+ return new Register(bit & RegsMask, RegisterType.Flag);
}
else
{
@@ -96,7 +117,7 @@ namespace ChocolArm64.Translation
public static bool IsRegIndex(int index)
{
- return (uint)index < 32;
+ return (uint)index < RegsCount;
}
}
} \ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCode.cs b/ChocolArm64/Translation/ILOpCode.cs
index 4021603c..48645282 100644
--- a/ChocolArm64/Translation/ILOpCode.cs
+++ b/ChocolArm64/Translation/ILOpCode.cs
@@ -4,16 +4,16 @@ namespace ChocolArm64.Translation
{
struct ILOpCode : IILEmit
{
- private OpCode _ilOp;
+ public OpCode ILOp { get; }
public ILOpCode(OpCode ilOp)
{
- _ilOp = ilOp;
+ ILOp = ilOp;
}
public void Emit(ILMethodBuilder context)
{
- context.Generator.Emit(_ilOp);
+ context.Generator.Emit(ILOp);
}
}
} \ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCodeBranch.cs b/ChocolArm64/Translation/ILOpCodeBranch.cs
index 22b80b5d..9d4e40fa 100644
--- a/ChocolArm64/Translation/ILOpCodeBranch.cs
+++ b/ChocolArm64/Translation/ILOpCodeBranch.cs
@@ -4,18 +4,18 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeBranch : IILEmit
{
- private OpCode _ilOp;
- private ILLabel _label;
+ public OpCode ILOp { get; }
+ public ILLabel Label { get; }
public ILOpCodeBranch(OpCode ilOp, ILLabel label)
{
- _ilOp = ilOp;
- _label = label;
+ ILOp = ilOp;
+ Label = label;
}
public void Emit(ILMethodBuilder context)
{
- context.Generator.Emit(_ilOp, _label.GetLabel(context));
+ context.Generator.Emit(ILOp, Label.GetLabel(context));
}
}
} \ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCodeCall.cs b/ChocolArm64/Translation/ILOpCodeCall.cs
index c046aeeb..dc20417a 100644
--- a/ChocolArm64/Translation/ILOpCodeCall.cs
+++ b/ChocolArm64/Translation/ILOpCodeCall.cs
@@ -5,9 +5,9 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeCall : IILEmit
{
- public MethodInfo Info { get; private set; }
+ public MethodInfo Info { get; }
- public bool IsVirtual { get; private set; }
+ public bool IsVirtual { get; }
public ILOpCodeCall(MethodInfo info, bool isVirtual)
{
diff --git a/ChocolArm64/Translation/ILOpCodeConst.cs b/ChocolArm64/Translation/ILOpCodeConst.cs
index 2aaf8676..cd3b58ff 100644
--- a/ChocolArm64/Translation/ILOpCodeConst.cs
+++ b/ChocolArm64/Translation/ILOpCodeConst.cs
@@ -16,6 +16,8 @@ namespace ChocolArm64.Translation
private ImmVal _value;
+ public long Value => _value.I8;
+
private enum ConstType
{
Int32,
diff --git a/ChocolArm64/Translation/ILOpCodeLoad.cs b/ChocolArm64/Translation/ILOpCodeLoad.cs
index c31e06bb..0d11eeaa 100644
--- a/ChocolArm64/Translation/ILOpCodeLoad.cs
+++ b/ChocolArm64/Translation/ILOpCodeLoad.cs
@@ -5,28 +5,28 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeLoad : IILEmit
{
- public int Index { get; private set; }
+ public int Index { get; }
- public IoType IoType { get; private set; }
+ public VarType VarType { get; }
- public RegisterSize RegisterSize { get; private set; }
+ public RegisterSize RegisterSize { get; }
- public ILOpCodeLoad(int index, IoType ioType, RegisterSize registerSize = 0)
+ public ILOpCodeLoad(int index, VarType varType, RegisterSize registerSize = 0)
{
Index = index;
- IoType = ioType;
+ VarType = varType;
RegisterSize = registerSize;
}
public void Emit(ILMethodBuilder context)
{
- switch (IoType)
+ switch (VarType)
{
- case IoType.Arg: context.Generator.EmitLdarg(Index); break;
+ case VarType.Arg: context.Generator.EmitLdarg(Index); break;
- case IoType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break;
- case IoType.Int: EmitLdloc(context, Index, RegisterType.Int); break;
- case IoType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break;
+ case VarType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break;
+ case VarType.Int: EmitLdloc(context, Index, RegisterType.Int); break;
+ case VarType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break;
}
}
diff --git a/ChocolArm64/Translation/ILOpCodeLoadField.cs b/ChocolArm64/Translation/ILOpCodeLoadField.cs
index abcd37c3..f0507ac2 100644
--- a/ChocolArm64/Translation/ILOpCodeLoadField.cs
+++ b/ChocolArm64/Translation/ILOpCodeLoadField.cs
@@ -5,7 +5,7 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeLoadField : IILEmit
{
- public FieldInfo Info { get; private set; }
+ public FieldInfo Info { get; }
public ILOpCodeLoadField(FieldInfo info)
{
diff --git a/ChocolArm64/Translation/ILOpCodeLoadState.cs b/ChocolArm64/Translation/ILOpCodeLoadState.cs
index ddab6110..c23dc943 100644
--- a/ChocolArm64/Translation/ILOpCodeLoadState.cs
+++ b/ChocolArm64/Translation/ILOpCodeLoadState.cs
@@ -7,15 +7,24 @@ namespace ChocolArm64.Translation
{
private ILBlock _block;
- public ILOpCodeLoadState(ILBlock block)
+ private bool _isSubEntry;
+
+ public ILOpCodeLoadState(ILBlock block, bool isSubEntry = false)
{
- _block = block;
+ _block = block;
+ _isSubEntry = isSubEntry;
}
public void Emit(ILMethodBuilder context)
{
- long intInputs = context.LocalAlloc.GetIntInputs(_block);
- long vecInputs = context.LocalAlloc.GetVecInputs(_block);
+ long intInputs = context.RegUsage.GetIntInputs(_block);
+ long vecInputs = context.RegUsage.GetVecInputs(_block);
+
+ if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
+ {
+ intInputs = RegisterUsage.ClearCallerSavedIntRegs(intInputs, context.IsAarch64);
+ vecInputs = RegisterUsage.ClearCallerSavedVecRegs(vecInputs, context.IsAarch64);
+ }
LoadLocals(context, intInputs, RegisterType.Int);
LoadLocals(context, vecInputs, RegisterType.Vector);
diff --git a/ChocolArm64/Translation/ILOpCodeLog.cs b/ChocolArm64/Translation/ILOpCodeLog.cs
index ebb042b5..53846f92 100644
--- a/ChocolArm64/Translation/ILOpCodeLog.cs
+++ b/ChocolArm64/Translation/ILOpCodeLog.cs
@@ -2,16 +2,16 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeLog : IILEmit
{
- private string _text;
+ public string Text { get; }
public ILOpCodeLog(string text)
{
- _text = text;
+ Text = text;
}
public void Emit(ILMethodBuilder context)
{
- context.Generator.EmitWriteLine(_text);
+ context.Generator.EmitWriteLine(Text);
}
}
} \ No newline at end of file
diff --git a/ChocolArm64/Translation/ILOpCodeStore.cs b/ChocolArm64/Translation/ILOpCodeStore.cs
index 17a6259c..7ac78e9a 100644
--- a/ChocolArm64/Translation/ILOpCodeStore.cs
+++ b/ChocolArm64/Translation/ILOpCodeStore.cs
@@ -5,28 +5,28 @@ namespace ChocolArm64.Translation
{
struct ILOpCodeStore : IILEmit
{
- public int Index { get; private set; }
+ public int Index { get; }
- public IoType IoType { get; private set; }
+ public VarType VarType { get; }
- public RegisterSize RegisterSize { get; private set; }
+ public RegisterSize RegisterSize { get; }
- public ILOpCodeStore(int index, IoType ioType, RegisterSize registerSize = 0)
+ public ILOpCodeStore(int index, VarType varType, RegisterSize registerSize = 0)
{
Index = index;
- IoType = ioType;
+ VarType = varType;
RegisterSize = registerSize;
}
public void Emit(ILMethodBuilder context)
{
- switch (IoType)
+ switch (VarType)
{
- case IoType.Arg: context.Generator.EmitStarg(Index); break;
+ case VarType.Arg: context.Generator.EmitStarg(Index); break;
- case IoType.Flag: EmitStloc(context, Index, RegisterType.Flag); break;
- case IoType.Int: EmitStloc(context, Index, RegisterType.Int); break;
- case IoType.Vector: EmitStloc(context, Index, RegisterType.Vector); break;
+ case VarType.Flag: EmitStloc(context, Index, RegisterType.Flag); break;
+ case VarType.Int: EmitStloc(context, Index, RegisterType.Int); break;
+ case VarType.Vector: EmitStloc(context, Index, RegisterType.Vector); break;
}
}
diff --git a/ChocolArm64/Translation/ILOpCodeStoreState.cs b/ChocolArm64/Translation/ILOpCodeStoreState.cs
index 458e9eda..a587dbfe 100644
--- a/ChocolArm64/Translation/ILOpCodeStoreState.cs
+++ b/ChocolArm64/Translation/ILOpCodeStoreState.cs
@@ -7,15 +7,33 @@ namespace ChocolArm64.Translation
{
private ILBlock _block;
- public ILOpCodeStoreState(ILBlock block)
+ private TranslatedSub _callSub;
+
+ public ILOpCodeStoreState(ILBlock block, TranslatedSub callSub = null)
{
- _block = block;
+ _block = block;
+ _callSub = callSub;
}
public void Emit(ILMethodBuilder context)
{
- long intOutputs = context.LocalAlloc.GetIntOutputs(_block);
- long vecOutputs = context.LocalAlloc.GetVecOutputs(_block);
+ long intOutputs = context.RegUsage.GetIntOutputs(_block);
+ long vecOutputs = context.RegUsage.GetVecOutputs(_block);
+
+ if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
+ {
+ intOutputs = RegisterUsage.ClearCallerSavedIntRegs(intOutputs, context.IsAarch64);
+ vecOutputs = RegisterUsage.ClearCallerSavedVecRegs(vecOutputs, context.IsAarch64);
+ }
+
+ if (_callSub != null)
+ {
+ //Those register are assigned on the callee function, without
+ //reading it's value first. We don't need to write them because
+ //they are not going to be read on the callee.
+ intOutputs &= ~_callSub.IntNiRegsMask;
+ vecOutputs &= ~_callSub.VecNiRegsMask;
+ }
StoreLocals(context, intOutputs, RegisterType.Int);
StoreLocals(context, vecOutputs, RegisterType.Vector);
diff --git a/ChocolArm64/Translation/LocalAlloc.cs b/ChocolArm64/Translation/RegisterUsage.cs
index 763be619..2e6829d5 100644
--- a/ChocolArm64/Translation/LocalAlloc.cs
+++ b/ChocolArm64/Translation/RegisterUsage.cs
@@ -3,8 +3,13 @@ using System.Collections.Generic;
namespace ChocolArm64.Translation
{
- class LocalAlloc
+ class RegisterUsage
{
+ public const long CallerSavedIntRegistersMask = 0x7fL << 9;
+ public const long PStateNzcvFlagsMask = 0xfL << 60;
+
+ public const long CallerSavedVecRegistersMask = 0xffffL << 16;
+
private class PathIo
{
private Dictionary<ILBlock, long> _allInputs;
@@ -18,31 +23,30 @@ namespace ChocolArm64.Translation
_cmnOutputs = new Dictionary<ILBlock, long>();
}
- public PathIo(ILBlock root, long inputs, long outputs) : this()
- {
- Set(root, inputs, outputs);
- }
-
- public void Set(ILBlock root, long inputs, long outputs)
+ public void Set(ILBlock entry, long inputs, long outputs)
{
- if (!_allInputs.TryAdd(root, inputs))
+ if (!_allInputs.TryAdd(entry, inputs))
{
- _allInputs[root] |= inputs;
+ _allInputs[entry] |= inputs;
}
- if (!_cmnOutputs.TryAdd(root, outputs))
+ if (!_cmnOutputs.TryAdd(entry, outputs))
{
- _cmnOutputs[root] &= outputs;
+ _cmnOutputs[entry] &= outputs;
}
_allOutputs |= outputs;
}
- public long GetInputs(ILBlock root)
+ public long GetInputs(ILBlock entry)
{
- if (_allInputs.TryGetValue(root, out long inputs))
+ if (_allInputs.TryGetValue(entry, out long inputs))
{
- return inputs | (_allOutputs & ~_cmnOutputs[root]);
+ //We also need to read the registers that may not be written
+ //by all paths that can reach a exit point, to ensure that
+ //the local variable will not remain uninitialized depending
+ //on the flow path taken.
+ return inputs | (_allOutputs & ~_cmnOutputs[entry]);
}
return 0;
@@ -57,15 +61,38 @@ namespace ChocolArm64.Translation
private Dictionary<ILBlock, PathIo> _intPaths;
private Dictionary<ILBlock, PathIo> _vecPaths;
- private struct BlockIo
+ private struct BlockIo : IEquatable<BlockIo>
{
- public ILBlock Block;
- public ILBlock Entry;
+ public ILBlock Block { get; }
+ public ILBlock Entry { get; }
+
+ public long IntInputs { get; set; }
+ public long VecInputs { get; set; }
+ public long IntOutputs { get; set; }
+ public long VecOutputs { get; set; }
- public long IntInputs;
- public long VecInputs;
- public long IntOutputs;
- public long VecOutputs;
+ public BlockIo(ILBlock block, ILBlock entry)
+ {
+ Block = block;
+ Entry = entry;
+
+ IntInputs = IntOutputs = 0;
+ VecInputs = VecOutputs = 0;
+ }
+
+ public BlockIo(
+ ILBlock block,
+ ILBlock entry,
+ long intInputs,
+ long vecInputs,
+ long intOutputs,
+ long vecOutputs) : this(block, entry)
+ {
+ IntInputs = intInputs;
+ VecInputs = vecInputs;
+ IntOutputs = intOutputs;
+ VecOutputs = vecOutputs;
+ }
public override bool Equals(object obj)
{
@@ -74,6 +101,11 @@ namespace ChocolArm64.Translation
return false;
}
+ return Equals(other);
+ }
+
+ public bool Equals(BlockIo other)
+ {
return other.Block == Block &&
other.Entry == Entry &&
other.IntInputs == IntInputs &&
@@ -98,25 +130,13 @@ namespace ChocolArm64.Translation
}
}
- private const int MaxOptGraphLength = 40;
-
- public LocalAlloc(ILBlock[] graph, ILBlock entry)
+ public RegisterUsage()
{
_intPaths = new Dictionary<ILBlock, PathIo>();
_vecPaths = new Dictionary<ILBlock, PathIo>();
-
- if (graph.Length > 1 &&
- graph.Length < MaxOptGraphLength)
- {
- InitializeOptimal(graph, entry);
- }
- else
- {
- InitializeFast(graph);
- }
}
- private void InitializeOptimal(ILBlock[] graph, ILBlock entry)
+ public void BuildUses(ILBlock entry)
{
//This will go through all possible paths on the graph,
//and store all inputs/outputs for each block. A register
@@ -124,7 +144,7 @@ namespace ChocolArm64.Translation
//When a block can be reached by more than one path, then the
//output from all paths needs to be set for this block, and
//only outputs present in all of the parent blocks can be considered
- //when doing input elimination. Each block chain have a entry, that's where
+ //when doing input elimination. Each block chain has a entry, that's where
//the code starts executing. They are present on the subroutine start point,
//and on call return points too (address written to X30 by BL).
HashSet<BlockIo> visited = new HashSet<BlockIo>();
@@ -133,19 +153,13 @@ namespace ChocolArm64.Translation
void Enqueue(BlockIo block)
{
- if (!visited.Contains(block))
+ if (visited.Add(block))
{
unvisited.Enqueue(block);
-
- visited.Add(block);
}
}
- Enqueue(new BlockIo()
- {
- Block = entry,
- Entry = entry
- });
+ Enqueue(new BlockIo(entry, entry));
while (unvisited.Count > 0)
{
@@ -177,19 +191,21 @@ namespace ChocolArm64.Translation
void EnqueueFromCurrent(ILBlock block, bool retTarget)
{
- BlockIo blockIo = new BlockIo() { Block = block };
+ BlockIo blockIo;
if (retTarget)
{
- blockIo.Entry = block;
+ blockIo = new BlockIo(block, block);
}
else
{
- blockIo.Entry = current.Entry;
- blockIo.IntInputs = current.IntInputs;
- blockIo.VecInputs = current.VecInputs;
- blockIo.IntOutputs = current.IntOutputs;
- blockIo.VecOutputs = current.VecOutputs;
+ blockIo = new BlockIo(
+ block,
+ current.Entry,
+ current.IntInputs,
+ current.VecInputs,
+ current.IntOutputs,
+ current.VecOutputs);
}
Enqueue(blockIo);
@@ -207,54 +223,63 @@ namespace ChocolArm64.Translation
}
}
- private void InitializeFast(ILBlock[] graph)
- {
- //This is WAY faster than InitializeOptimal, but results in
- //unneeded loads and stores, so the resulting code will be slower.
- long intInputs = 0, intOutputs = 0;
- long vecInputs = 0, vecOutputs = 0;
+ public long GetIntInputs(ILBlock entry) => GetInputsImpl(entry, _intPaths.Values);
+ public long GetVecInputs(ILBlock entry) => GetInputsImpl(entry, _vecPaths.Values);
- foreach (ILBlock block in graph)
- {
- intInputs |= block.IntInputs;
- intOutputs |= block.IntOutputs;
- vecInputs |= block.VecInputs;
- vecOutputs |= block.VecOutputs;
- }
+ private long GetInputsImpl(ILBlock entry, IEnumerable<PathIo> values)
+ {
+ long inputs = 0;
- //It's possible that not all code paths writes to those output registers,
- //in those cases if we attempt to write an output registers that was
- //not written, we will be just writing zero and messing up the old register value.
- //So we just need to ensure that all outputs are loaded.
- if (graph.Length > 1)
+ foreach (PathIo path in values)
{
- intInputs |= intOutputs;
- vecInputs |= vecOutputs;
+ inputs |= path.GetInputs(entry);
}
- foreach (ILBlock block in graph)
- {
- _intPaths.Add(block, new PathIo(block, intInputs, intOutputs));
- _vecPaths.Add(block, new PathIo(block, vecInputs, vecOutputs));
- }
+ return inputs;
}
- public long GetIntInputs(ILBlock root) => GetInputsImpl(root, _intPaths.Values);
- public long GetVecInputs(ILBlock root) => GetInputsImpl(root, _vecPaths.Values);
+ public long GetIntNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _intPaths.Values);
+ public long GetVecNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _vecPaths.Values);
- private long GetInputsImpl(ILBlock root, IEnumerable<PathIo> values)
+ private long GetNotInputsImpl(ILBlock entry, IEnumerable<PathIo> values)
{
- long inputs = 0;
+ //Returns a mask with registers that are written to
+ //before being read. Only those registers that are
+ //written in all paths, and is not read before being
+ //written to on those paths, should be set on the mask.
+ long mask = -1L;
foreach (PathIo path in values)
{
- inputs |= path.GetInputs(root);
+ mask &= path.GetOutputs() & ~path.GetInputs(entry);
}
- return inputs;
+ return mask;
}
public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs();
public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs();
+
+ public static long ClearCallerSavedIntRegs(long mask, bool isAarch64)
+ {
+ //TODO: ARM32 support.
+ if (isAarch64)
+ {
+ mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask);
+ }
+
+ return mask;
+ }
+
+ public static long ClearCallerSavedVecRegs(long mask, bool isAarch64)
+ {
+ //TODO: ARM32 support.
+ if (isAarch64)
+ {
+ mask &= ~CallerSavedVecRegistersMask;
+ }
+
+ return mask;
+ }
}
} \ No newline at end of file
diff --git a/ChocolArm64/Translation/TranslatedSub.cs b/ChocolArm64/Translation/TranslatedSub.cs
index 65d70351..8b599b7a 100644
--- a/ChocolArm64/Translation/TranslatedSub.cs
+++ b/ChocolArm64/Translation/TranslatedSub.cs
@@ -10,21 +10,41 @@ namespace ChocolArm64.Translation
class TranslatedSub
{
+ //This is the minimum amount of calls needed for the method
+ //to be retranslated with higher quality code. It's only worth
+ //doing that for hot code.
+ private const int MinCallCountForOpt = 30;
+
public ArmSubroutine Delegate { get; private set; }
- public static int StateArgIdx { get; private set; }
- public static int MemoryArgIdx { get; private set; }
+ public static int StateArgIdx { get; }
+ public static int MemoryArgIdx { get; }
+
+ public static Type[] FixedArgTypes { get; }
+
+ public DynamicMethod Method { get; }
+
+ public TranslationTier Tier { get; }
- public static Type[] FixedArgTypes { get; private set; }
+ public long IntNiRegsMask { get; }
+ public long VecNiRegsMask { get; }
- public DynamicMethod Method { get; private set; }
+ private bool _isWorthOptimizing;
- public TranslationTier Tier { get; private set; }
+ private int _callCount;
- public TranslatedSub(DynamicMethod method, TranslationTier tier)
+ public TranslatedSub(
+ DynamicMethod method,
+ long intNiRegsMask,
+ long vecNiRegsMask,
+ TranslationTier tier,
+ bool isWorthOptimizing)
{
- Method = method ?? throw new ArgumentNullException(nameof(method));;
- Tier = tier;
+ Method = method ?? throw new ArgumentNullException(nameof(method));;
+ IntNiRegsMask = intNiRegsMask;
+ VecNiRegsMask = vecNiRegsMask;
+ _isWorthOptimizing = isWorthOptimizing;
+ Tier = tier;
}
static TranslatedSub()
@@ -61,5 +81,24 @@ namespace ChocolArm64.Translation
{
return Delegate(threadState, memory);
}
+
+ public bool IsWorthOptimizing()
+ {
+ if (!_isWorthOptimizing)
+ {
+ return false;
+ }
+
+ if (_callCount++ < MinCallCountForOpt)
+ {
+ return false;
+ }
+
+ //Only return true once, so that it is
+ //added to the queue only once.
+ _isWorthOptimizing = false;
+
+ return true;
+ }
}
} \ No newline at end of file
diff --git a/ChocolArm64/Translation/Translator.cs b/ChocolArm64/Translation/Translator.cs
index dd1215f5..bda0bca0 100644
--- a/ChocolArm64/Translation/Translator.cs
+++ b/ChocolArm64/Translation/Translator.cs
@@ -63,46 +63,34 @@ namespace ChocolArm64.Translation
CpuTrace?.Invoke(this, new CpuTraceEventArgs(position));
}
- TranslatedSub subroutine = GetOrTranslateSubroutine(state, position);
+ if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
+ {
+ sub = TranslateLowCq(position, state.GetExecutionMode());
+ }
- position = subroutine.Execute(state, _memory);
+ position = sub.Execute(state, _memory);
}
while (position != 0 && state.Running);
state.CurrentTranslator = null;
}
- internal void TranslateVirtualSubroutine(CpuThreadState state, long position)
- {
- if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0)
- {
- _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
- }
- }
-
- internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position)
+ internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
{
if (!_cache.TryGetSubroutine(position, out TranslatedSub sub))
{
sub = TranslateLowCq(position, state.GetExecutionMode());
}
- if (sub.Tier == TranslationTier.Tier0)
+ if (sub.IsWorthOptimizing())
{
- _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1));
- }
-
- return sub.Delegate;
- }
+ bool isComplete = cs == CallType.Call ||
+ cs == CallType.VirtualCall;
- internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position)
- {
- if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine))
- {
- subroutine = TranslateLowCq(position, state.GetExecutionMode());
+ _queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete);
}
- return subroutine;
+ return sub.Delegate;
}
private void TranslateQueuedSubs()
@@ -124,7 +112,7 @@ namespace ChocolArm64.Translation
}
else
{
- TranslateHighCq(item.Position, item.Mode);
+ TranslateHighCq(item.Position, item.Mode, item.IsComplete);
}
}
else
@@ -142,14 +130,16 @@ namespace ChocolArm64.Translation
string subName = GetSubroutineName(position);
- ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName);
+ bool isAarch64 = mode == ExecutionMode.Aarch64;
+
+ ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
- TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0);
+ TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true);
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
}
- private void TranslateHighCq(long position, ExecutionMode mode)
+ private TranslatedSub TranslateHighCq(long position, ExecutionMode mode, bool isComplete)
{
Block graph = Decoder.DecodeSubroutine(_memory, position, mode);
@@ -159,9 +149,13 @@ namespace ChocolArm64.Translation
string subName = GetSubroutineName(position);
- ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName);
+ bool isAarch64 = mode == ExecutionMode.Aarch64;
+
+ isComplete &= !context.HasIndirectJump;
- TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1);
+ ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
+
+ TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall);
int ilOpCount = 0;
@@ -170,9 +164,11 @@ namespace ChocolArm64.Translation
ilOpCount += ilBlock.Count;
}
+ ForceAheadOfTimeCompilation(subroutine);
+
_cache.AddOrUpdate(position, subroutine, ilOpCount);
- ForceAheadOfTimeCompilation(subroutine);
+ return subroutine;
}
private string GetSubroutineName(long position)
diff --git a/ChocolArm64/Translation/TranslatorQueue.cs b/ChocolArm64/Translation/TranslatorQueue.cs
index 89d665bf..0f1d8474 100644
--- a/ChocolArm64/Translation/TranslatorQueue.cs
+++ b/ChocolArm64/Translation/TranslatorQueue.cs
@@ -1,3 +1,4 @@
+using ChocolArm64.State;
using System.Collections.Concurrent;
using System.Threading;
@@ -5,10 +6,6 @@ namespace ChocolArm64.Translation
{
class TranslatorQueue
{
- //This is the maximum number of functions to be translated that the queue can hold.
- //The value may need some tuning to find the sweet spot.
- private const int MaxQueueSize = 1024;
-
private ConcurrentStack<TranslatorQueueItem>[] _translationQueue;
private ManualResetEvent _queueDataReceivedEvent;
@@ -27,14 +24,11 @@ namespace ChocolArm64.Translation
_queueDataReceivedEvent = new ManualResetEvent(false);
}
- public void Enqueue(TranslatorQueueItem item)
+ public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete)
{
- ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)item.Tier];
+ TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete);
- if (queue.Count >= MaxQueueSize)
- {
- queue.TryPop(out _);
- }
+ ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)tier];
queue.Push(item);
diff --git a/ChocolArm64/Translation/TranslatorQueueItem.cs b/ChocolArm64/Translation/TranslatorQueueItem.cs
index 0988414a..dde2706d 100644
--- a/ChocolArm64/Translation/TranslatorQueueItem.cs
+++ b/ChocolArm64/Translation/TranslatorQueueItem.cs
@@ -10,11 +10,18 @@ namespace ChocolArm64.Translation
public TranslationTier Tier { get; }
- public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier)
+ public bool IsComplete { get; }
+
+ public TranslatorQueueItem(
+ long position,
+ ExecutionMode mode,
+ TranslationTier tier,
+ bool isComplete = false)
{
- Position = position;
- Mode = mode;
- Tier = tier;
+ Position = position;
+ Mode = mode;
+ Tier = tier;
+ IsComplete = isComplete;
}
}
} \ No newline at end of file
diff --git a/ChocolArm64/Translation/IoType.cs b/ChocolArm64/Translation/VarType.cs
index c7710e0c..d671575e 100644
--- a/ChocolArm64/Translation/IoType.cs
+++ b/ChocolArm64/Translation/VarType.cs
@@ -1,6 +1,6 @@
namespace ChocolArm64.Translation
{
- enum IoType
+ enum VarType
{
Arg,
Flag,
diff --git a/Ryujinx/Config.jsonc b/Ryujinx/Config.jsonc
index 8b5ebe03..6e808b56 100644
--- a/Ryujinx/Config.jsonc
+++ b/Ryujinx/Config.jsonc
@@ -29,18 +29,21 @@
// System Language list: https://gist.github.com/HorrorTroll/b6e4a88d774c3c9b3bdf54d79a7ca43b
"system_language": "AmericanEnglish",
- // Enable or Disable Docked Mode
+ // Enable or disable Docked Mode
"docked_mode": false,
-
- // Enable or Disable Game Vsync
+
+ // Enable or disable Game Vsync
"enable_vsync": true,
-
- // Enable or Disable Multi-core scheduling of threads
+
+ // Enable or disable Multi-core scheduling of threads
"enable_multicore_scheduling": true,
-
+
// Enable integrity checks on Switch content files
"enable_fs_integrity_checks": true,
-
+
+ // Enable or disable aggressive CPU optimizations
+ "enable_aggressive_cpu_opts": true,
+
// The primary controller's type
// Supported Values: Handheld, ProController, NpadPair, NpadLeft, NpadRight
"controller_type": "Handheld",
diff --git a/Ryujinx/Configuration.cs b/Ryujinx/Configuration.cs
index dbbec1cb..c4a1b436 100644
--- a/Ryujinx/Configuration.cs
+++ b/Ryujinx/Configuration.cs
@@ -87,6 +87,11 @@ namespace Ryujinx
public bool EnableFsIntegrityChecks { get; private set; }
/// <summary>
+ /// Enable or Disable aggressive CPU optimizations
+ /// </summary>
+ public bool EnableAggressiveCpuOpts { get; private set; }
+
+ /// <summary>
/// The primary controller's type
/// </summary>
public HidControllerType ControllerType { get; private set; }
@@ -197,6 +202,11 @@ namespace Ryujinx
? IntegrityCheckLevel.ErrorOnInvalid
: IntegrityCheckLevel.None;
+ if (Instance.EnableAggressiveCpuOpts)
+ {
+ Optimizations.AssumeStrictAbiCompliance = true;
+ }
+
if(Instance.GamepadControls.Enabled)
{
if (GamePad.GetName(Instance.GamepadControls.Index) == "Unmapped Controller")
diff --git a/Ryujinx/_schema.json b/Ryujinx/_schema.json
index 0e586671..7e7e4665 100644
--- a/Ryujinx/_schema.json
+++ b/Ryujinx/_schema.json
@@ -17,6 +17,7 @@
"enable_vsync",
"enable_multicore_scheduling",
"enable_fs_integrity_checks",
+ "enable_aggressive_cpu_opts",
"controller_type",
"keyboard_controls",
"gamepad_controls"
@@ -399,6 +400,17 @@
false
]
},
+ "enable_aggressive_cpu_opts": {
+ "$id": "#/properties/enable_aggressive_cpu_opts",
+ "type": "boolean",
+ "title": "Enable Aggressive CPU Optimizations",
+ "description": "Enable or disable aggressive CPU optimizations",
+ "default": true,
+ "examples": [
+ true,
+ false
+ ]
+ },
"controller_type": {
"$id": "#/properties/controller_type",
"type": "string",