aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs')
-rw-r--r--src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs340
1 files changed, 340 insertions, 0 deletions
diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs b/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs
new file mode 100644
index 00000000..a7107499
--- /dev/null
+++ b/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs
@@ -0,0 +1,340 @@
+using ARMeilleure.Memory;
+using Ryujinx.Common;
+using Ryujinx.Memory;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace Ryujinx.Cpu.LightningJit.Cache
+{
+ class NoWxCache : IDisposable
+ {
+ private const int CodeAlignment = 4; // Bytes.
+ private const int SharedCacheSize = 2047 * 1024 * 1024;
+ private const int LocalCacheSize = 128 * 1024 * 1024;
+
+ // How many calls to the same function we allow until we pad the shared cache to force the function to become available there
+ // and allow the guest to take the fast path.
+ private const int MinCallsForPad = 8;
+
+ private class MemoryCache : IDisposable
+ {
+ private readonly ReservedRegion _region;
+ private readonly CacheMemoryAllocator _cacheAllocator;
+
+ public CacheMemoryAllocator Allocator => _cacheAllocator;
+ public IntPtr Pointer => _region.Block.Pointer;
+
+ public MemoryCache(IJitMemoryAllocator allocator, ulong size)
+ {
+ _region = new(allocator, size);
+ _cacheAllocator = new((int)size);
+ }
+
+ public int Allocate(int codeSize)
+ {
+ codeSize = AlignCodeSize(codeSize);
+
+ int allocOffset = _cacheAllocator.Allocate(codeSize);
+
+ if (allocOffset < 0)
+ {
+ throw new OutOfMemoryException("JIT Cache exhausted.");
+ }
+
+ _region.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
+
+ return allocOffset;
+ }
+
+ public void Free(int offset, int size)
+ {
+ _cacheAllocator.Free(offset, size);
+ }
+
+ public void ReprotectAsRw(int offset, int size)
+ {
+ Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
+ Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
+
+ _region.Block.MapAsRw((ulong)offset, (ulong)size);
+ }
+
+ public void ReprotectAsRx(int offset, int size)
+ {
+ Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
+ Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
+
+ _region.Block.MapAsRx((ulong)offset, (ulong)size);
+
+ if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS())
+ {
+ JitSupportDarwin.SysIcacheInvalidate(_region.Block.Pointer + offset, size);
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+ }
+
+ private static int AlignCodeSize(int codeSize)
+ {
+ return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ _region.Dispose();
+ _cacheAllocator.Clear();
+ }
+ }
+
+ public void Dispose()
+ {
+ // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
+ Dispose(disposing: true);
+ GC.SuppressFinalize(this);
+ }
+ }
+
+ private readonly IStackWalker _stackWalker;
+ private readonly Translator _translator;
+ private readonly MemoryCache _sharedCache;
+ private readonly MemoryCache _localCache;
+ private readonly PageAlignedRangeList _pendingMap;
+ private readonly object _lock;
+
+ class ThreadLocalCacheEntry
+ {
+ public readonly int Offset;
+ public readonly int Size;
+ public readonly IntPtr FuncPtr;
+ private int _useCount;
+
+ public ThreadLocalCacheEntry(int offset, int size, IntPtr funcPtr)
+ {
+ Offset = offset;
+ Size = size;
+ FuncPtr = funcPtr;
+ _useCount = 0;
+ }
+
+ public int IncrementUseCount()
+ {
+ return ++_useCount;
+ }
+ }
+
+ [ThreadStatic]
+ private static Dictionary<ulong, ThreadLocalCacheEntry> _threadLocalCache;
+
+ public NoWxCache(IJitMemoryAllocator allocator, IStackWalker stackWalker, Translator translator)
+ {
+ _stackWalker = stackWalker;
+ _translator = translator;
+ _sharedCache = new(allocator, SharedCacheSize);
+ _localCache = new(allocator, LocalCacheSize);
+ _pendingMap = new(_sharedCache.ReprotectAsRx, RegisterFunction);
+ _lock = new();
+ }
+
+ public unsafe IntPtr Map(IntPtr framePointer, ReadOnlySpan<byte> code, ulong guestAddress, ulong guestSize)
+ {
+ if (TryGetThreadLocalFunction(guestAddress, out IntPtr funcPtr))
+ {
+ return funcPtr;
+ }
+
+ lock (_lock)
+ {
+ if (!_pendingMap.Has(guestAddress) && !_translator.Functions.ContainsKey(guestAddress))
+ {
+ int funcOffset = _sharedCache.Allocate(code.Length);
+
+ funcPtr = _sharedCache.Pointer + funcOffset;
+ code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
+
+ TranslatedFunction function = new(funcPtr, guestSize);
+
+ _pendingMap.Add(funcOffset, code.Length, guestAddress, function);
+ }
+
+ ClearThreadLocalCache(framePointer);
+
+ return AddThreadLocalFunction(code, guestAddress);
+ }
+ }
+
+ public unsafe IntPtr MapPageAligned(ReadOnlySpan<byte> code)
+ {
+ lock (_lock)
+ {
+ // Ensure we will get an aligned offset from the allocator.
+ _pendingMap.Pad(_sharedCache.Allocator);
+
+ int sizeAligned = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
+ int funcOffset = _sharedCache.Allocate(sizeAligned);
+
+ Debug.Assert((funcOffset & ((int)MemoryBlock.GetPageSize() - 1)) == 0);
+
+ IntPtr funcPtr = _sharedCache.Pointer + funcOffset;
+ code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
+
+ _sharedCache.ReprotectAsRx(funcOffset, sizeAligned);
+
+ return funcPtr;
+ }
+ }
+
+ private bool TryGetThreadLocalFunction(ulong guestAddress, out IntPtr funcPtr)
+ {
+ if ((_threadLocalCache ??= new()).TryGetValue(guestAddress, out var entry))
+ {
+ if (entry.IncrementUseCount() >= MinCallsForPad)
+ {
+ // Function is being called often, let's make it available in the shared cache so that the guest code
+ // can take the fast path and stop calling the emulator to get the function from the thread local cache.
+ // To do that we pad all "pending" function until they complete a page of memory, allowing us to reprotect them as RX.
+
+ lock (_lock)
+ {
+ _pendingMap.Pad(_sharedCache.Allocator);
+ }
+ }
+
+ funcPtr = entry.FuncPtr;
+
+ return true;
+ }
+
+ funcPtr = IntPtr.Zero;
+
+ return false;
+ }
+
+ private void ClearThreadLocalCache(IntPtr framePointer)
+ {
+ // Try to delete functions that are already on the shared cache
+ // and no longer being executed.
+
+ if (_threadLocalCache == null)
+ {
+ return;
+ }
+
+ IEnumerable<ulong> callStack = _stackWalker.GetCallStack(
+ framePointer,
+ _localCache.Pointer,
+ LocalCacheSize,
+ _sharedCache.Pointer,
+ SharedCacheSize);
+
+ List<(ulong, ThreadLocalCacheEntry)> toDelete = new();
+
+ foreach ((ulong address, ThreadLocalCacheEntry entry) in _threadLocalCache)
+ {
+ // We only want to delete if the function is already on the shared cache,
+ // otherwise we will keep translating the same function over and over again.
+ bool canDelete = !_pendingMap.Has(address);
+ if (!canDelete)
+ {
+ continue;
+ }
+
+ // We can only delete if the function is not part of the current thread call stack,
+ // otherwise we will crash the program when the thread returns to it.
+ foreach (ulong funcAddress in callStack)
+ {
+ if (funcAddress >= (ulong)entry.FuncPtr && funcAddress < (ulong)entry.FuncPtr + (ulong)entry.Size)
+ {
+ canDelete = false;
+ break;
+ }
+ }
+
+ if (canDelete)
+ {
+ toDelete.Add((address, entry));
+ }
+ }
+
+ int pageSize = (int)MemoryBlock.GetPageSize();
+
+ foreach ((ulong address, ThreadLocalCacheEntry entry) in toDelete)
+ {
+ _threadLocalCache.Remove(address);
+
+ int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
+
+ _localCache.Free(entry.Offset, sizeAligned);
+ _localCache.ReprotectAsRw(entry.Offset, sizeAligned);
+ }
+ }
+
+ public void ClearEntireThreadLocalCache()
+ {
+ // Thread is exiting, delete everything.
+
+ if (_threadLocalCache == null)
+ {
+ return;
+ }
+
+ int pageSize = (int)MemoryBlock.GetPageSize();
+
+ foreach ((_, ThreadLocalCacheEntry entry) in _threadLocalCache)
+ {
+ int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
+
+ _localCache.Free(entry.Offset, sizeAligned);
+ _localCache.ReprotectAsRw(entry.Offset, sizeAligned);
+ }
+
+ _threadLocalCache.Clear();
+ _threadLocalCache = null;
+ }
+
+ private unsafe IntPtr AddThreadLocalFunction(ReadOnlySpan<byte> code, ulong guestAddress)
+ {
+ int alignedSize = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
+ int funcOffset = _localCache.Allocate(alignedSize);
+
+ Debug.Assert((funcOffset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
+
+ IntPtr funcPtr = _localCache.Pointer + funcOffset;
+ code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
+
+ (_threadLocalCache ??= new()).Add(guestAddress, new(funcOffset, code.Length, funcPtr));
+
+ _localCache.ReprotectAsRx(funcOffset, alignedSize);
+
+ return funcPtr;
+ }
+
+ private void RegisterFunction(ulong address, TranslatedFunction func)
+ {
+ TranslatedFunction oldFunc = _translator.Functions.GetOrAdd(address, func.GuestSize, func);
+
+ Debug.Assert(oldFunc == func);
+
+ _translator.RegisterFunction(address, func);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ _localCache.Dispose();
+ _sharedCache.Dispose();
+ }
+ }
+
+ public void Dispose()
+ {
+ Dispose(disposing: true);
+ GC.SuppressFinalize(this);
+ }
+ }
+}