From 1be668e68a1937f2af239e2707ab914286018892 Mon Sep 17 00:00:00 2001
From: riperiperi <rhy3756547@hotmail.com>
Date: Thu, 30 Nov 2023 10:39:42 -0800
Subject: HLE: Add OS-specific precise sleep methods to reduce spinwaiting
 (#5948)

* feat: add nanosleep for linux and macos

* Add Windows 0.5ms sleep

- Imprecise waits for longer waits with clock alignment
- 1/4 the spin time on vsync timer

* Remove old experiment

* Fix event leak

* Tweaking for MacOS

* Linux tweaks, nanosleep vsync improvement

* Fix overbias

* Cleanup

* Fix realignment

* Add some docs and some cleanup

NanosleepPool needs more, Nanosleep has some benchmark code that needs removed.

* Rename "Microsleep" to "PreciseSleep"

Might have been confused with "microseconds", which no measurement is performed in.

* Remove nanosleep measurement

* Remove unused debug logging

* Nanosleep Pool Documentation

* More cleanup

* Whitespace

* Formatting

* Address Feedback

* Allow SleepUntilTimePoint to take EventWaitHandle

* Remove `_chrono` stopwatch in SurfaceFlinger

* Move spinwaiting logic to PreciseSleepHelper

Technically, these achieve different things, but having them here makes them easier to reuse or tune.
---
 .../PreciseSleep/IPreciseSleepEvent.cs             |  38 ++++
 src/Ryujinx.Common/PreciseSleep/Nanosleep.cs       | 160 +++++++++++++++
 src/Ryujinx.Common/PreciseSleep/NanosleepEvent.cs  |  84 ++++++++
 src/Ryujinx.Common/PreciseSleep/NanosleepPool.cs   | 228 +++++++++++++++++++++
 .../PreciseSleep/PreciseSleepHelper.cs             | 104 ++++++++++
 src/Ryujinx.Common/PreciseSleep/SleepEvent.cs      |  51 +++++
 .../PreciseSleep/WindowsGranularTimer.cs           | 220 ++++++++++++++++++++
 .../PreciseSleep/WindowsSleepEvent.cs              |  92 +++++++++
 src/Ryujinx.HLE/HOS/Kernel/Common/KTimeManager.cs  |  39 +---
 .../HOS/Services/SurfaceFlinger/SurfaceFlinger.cs  |  33 ++-
 10 files changed, 1000 insertions(+), 49 deletions(-)
 create mode 100644 src/Ryujinx.Common/PreciseSleep/IPreciseSleepEvent.cs
 create mode 100644 src/Ryujinx.Common/PreciseSleep/Nanosleep.cs
 create mode 100644 src/Ryujinx.Common/PreciseSleep/NanosleepEvent.cs
 create mode 100644 src/Ryujinx.Common/PreciseSleep/NanosleepPool.cs
 create mode 100644 src/Ryujinx.Common/PreciseSleep/PreciseSleepHelper.cs
 create mode 100644 src/Ryujinx.Common/PreciseSleep/SleepEvent.cs
 create mode 100644 src/Ryujinx.Common/PreciseSleep/WindowsGranularTimer.cs
 create mode 100644 src/Ryujinx.Common/PreciseSleep/WindowsSleepEvent.cs

diff --git a/src/Ryujinx.Common/PreciseSleep/IPreciseSleepEvent.cs b/src/Ryujinx.Common/PreciseSleep/IPreciseSleepEvent.cs
new file mode 100644
index 00000000..26b5ab68
--- /dev/null
+++ b/src/Ryujinx.Common/PreciseSleep/IPreciseSleepEvent.cs
@@ -0,0 +1,38 @@
+using System;
+
+namespace Ryujinx.Common.PreciseSleep
+{
+    /// <summary>
+    /// An event which works similarly to an AutoResetEvent, but is backed by a
+    /// more precise timer that allows waits of less than a millisecond.
+    /// </summary>
+    public interface IPreciseSleepEvent : IDisposable
+    {
+        /// <summary>
+        /// Adjust a timepoint to better fit the host clock.
+        /// When no adjustment is made, the input timepoint will be returned.
+        /// </summary>
+        /// <param name="timePoint">Timepoint to adjust</param>
+        /// <param name="timeoutNs">Requested timeout in nanoseconds</param>
+        /// <returns>Adjusted timepoint</returns>
+        long AdjustTimePoint(long timePoint, long timeoutNs);
+
+        /// <summary>
+        /// Sleep until a timepoint, or a signal is received.
+        /// Given no signal, may wake considerably before, or slightly after the timeout.
+        /// </summary>
+        /// <param name="timePoint">Timepoint to sleep until</param>
+        /// <returns>True if signalled or waited, false if a wait could not be performed</returns>
+        bool SleepUntil(long timePoint);
+
+        /// <summary>
+        /// Sleep until a signal is received.
+        /// </summary>
+        void Sleep();
+
+        /// <summary>
+        /// Signal the event, waking any sleeping thread or the next attempted sleep.
+        /// </summary>
+        void Signal();
+    }
+}
diff --git a/src/Ryujinx.Common/PreciseSleep/Nanosleep.cs b/src/Ryujinx.Common/PreciseSleep/Nanosleep.cs
new file mode 100644
index 00000000..67f067ae
--- /dev/null
+++ b/src/Ryujinx.Common/PreciseSleep/Nanosleep.cs
@@ -0,0 +1,160 @@
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Common.PreciseSleep
+{
+    /// <summary>
+    /// Access to Linux/MacOS nanosleep, with platform specific bias to improve precision.
+    /// </summary>
+    [SupportedOSPlatform("macos")]
+    [SupportedOSPlatform("linux")]
+    [SupportedOSPlatform("android")]
+    [SupportedOSPlatform("ios")]
+    internal static partial class Nanosleep
+    {
+        private const long LinuxBaseNanosleepBias = 50000; // 0.05ms
+
+        // Penalty for max allowed sleep duration
+        private const long LinuxNanosleepAccuracyPenaltyThreshold = 200000; // 0.2ms
+        private const long LinuxNanosleepAccuracyPenalty = 30000; // 0.03ms
+
+        // Penalty for base sleep duration
+        private const long LinuxNanosleepBasePenaltyThreshold = 500000; // 0.5ms
+        private const long LinuxNanosleepBasePenalty = 30000; // 0.03ms
+        private const long LinuxNanosleepPenaltyPerMillisecond = 18000; // 0.018ms
+        private const long LinuxNanosleepPenaltyCap = 18000; // 0.018ms
+
+        private const long LinuxStrictBiasOffset = 150_000; // 0.15ms
+
+        // Nanosleep duration is biased depending on the requested timeout on MacOS.
+        // These match the results when measuring on an M1 processor at AboveNormal priority.
+        private const long MacosBaseNanosleepBias = 5000; // 0.005ms
+        private const long MacosBiasPerMillisecond = 140000; // 0.14ms
+        private const long MacosBiasMaxNanoseconds = 20_000_000; // 20ms
+        private const long MacosStrictBiasOffset = 150_000; // 0.15ms
+
+        public static long Bias { get; }
+
+        /// <summary>
+        /// Get bias for a given nanosecond timeout.
+        /// Some platforms calculate their bias differently, this method can be used to counteract it.
+        /// </summary>
+        /// <param name="timeoutNs">Nanosecond timeout</param>
+        /// <returns>Bias in nanoseconds</returns>
+        public static long GetBias(long timeoutNs)
+        {
+            if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS())
+            {
+                long biasNs = Math.Min(timeoutNs, MacosBiasMaxNanoseconds);
+                return MacosBaseNanosleepBias + biasNs * MacosBiasPerMillisecond / 1_000_000;
+            }
+            else
+            {
+                long bias = LinuxBaseNanosleepBias;
+
+                if (timeoutNs > LinuxNanosleepBasePenaltyThreshold)
+                {
+                    long penalty = (timeoutNs - LinuxNanosleepBasePenaltyThreshold) * LinuxNanosleepPenaltyPerMillisecond / 1_000_000;
+                    bias += LinuxNanosleepBasePenalty + Math.Min(LinuxNanosleepPenaltyCap, penalty);
+                }
+
+                return bias;
+            }
+        }
+
+        /// <summary>
+        /// Get a stricter bias for a given nanosecond timeout,
+        /// which can improve the chances the sleep completes before the timeout.
+        /// Some platforms calculate their bias differently, this method can be used to counteract it.
+        /// </summary>
+        /// <param name="timeoutNs">Nanosecond timeout</param>
+        /// <returns>Strict bias in nanoseconds</returns>
+        public static long GetStrictBias(long timeoutNs)
+        {
+            if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS())
+            {
+                return GetBias(timeoutNs) + MacosStrictBiasOffset;
+            }
+            else
+            {
+                long bias = GetBias(timeoutNs) + LinuxStrictBiasOffset;
+
+                if (timeoutNs > LinuxNanosleepAccuracyPenaltyThreshold)
+                {
+                    bias += LinuxNanosleepAccuracyPenalty;
+                }
+
+                return bias;
+            }
+        }
+
+        static Nanosleep()
+        {
+            Bias = GetBias(0);
+        }
+
+        [StructLayout(LayoutKind.Sequential)]
+        private struct Timespec
+        {
+            public long tv_sec;  // Seconds
+            public long tv_nsec; // Nanoseconds
+        }
+
+        [LibraryImport("libc", SetLastError = true)]
+        private static partial int nanosleep(ref Timespec req, ref Timespec rem);
+
+        /// <summary>
+        /// Convert a timeout in nanoseconds to a timespec for nanosleep.
+        /// </summary>
+        /// <param name="nanoseconds">Timeout in nanoseconds</param>
+        /// <returns>Timespec for nanosleep</returns>
+        private static Timespec GetTimespecFromNanoseconds(ulong nanoseconds)
+        {
+            return new Timespec
+            {
+                tv_sec = (long)(nanoseconds / 1_000_000_000),
+                tv_nsec = (long)(nanoseconds % 1_000_000_000)
+            };
+        }
+
+        /// <summary>
+        /// Sleep for approximately a given time period in nanoseconds.
+        /// </summary>
+        /// <param name="nanoseconds">Time to sleep for in nanoseconds</param>
+        public static void Sleep(long nanoseconds)
+        {
+            nanoseconds -= GetBias(nanoseconds);
+
+            if (nanoseconds >= 0)
+            {
+                Timespec req = GetTimespecFromNanoseconds((ulong)nanoseconds);
+                Timespec rem = new();
+
+                nanosleep(ref req, ref rem);
+            }
+        }
+
+        /// <summary>
+        /// Sleep for at most a given time period in nanoseconds.
+        /// Uses a stricter bias to wake before the requested duration.
+        /// </summary>
+        /// <remarks>
+        /// Due to OS scheduling behaviour, this timeframe may still be missed.
+        /// </remarks>
+        /// <param name="nanoseconds">Maximum allowed time for sleep</param>
+        public static void SleepAtMost(long nanoseconds)
+        {
+            // Stricter bias to ensure we wake before the timepoint.
+            nanoseconds -= GetStrictBias(nanoseconds);
+
+            if (nanoseconds >= 0)
+            {
+                Timespec req = GetTimespecFromNanoseconds((ulong)nanoseconds);
+                Timespec rem = new();
+
+                nanosleep(ref req, ref rem);
+            }
+        }
+    }
+}
diff --git a/src/Ryujinx.Common/PreciseSleep/NanosleepEvent.cs b/src/Ryujinx.Common/PreciseSleep/NanosleepEvent.cs
new file mode 100644
index 00000000..f54fb09c
--- /dev/null
+++ b/src/Ryujinx.Common/PreciseSleep/NanosleepEvent.cs
@@ -0,0 +1,84 @@
+using System;
+using System.Runtime.Versioning;
+using System.Threading;
+
+namespace Ryujinx.Common.PreciseSleep
+{
+    /// <summary>
+    /// A precise sleep event for linux and macos that uses nanosleep for more precise timeouts.
+    /// </summary>
+    [SupportedOSPlatform("macos")]
+    [SupportedOSPlatform("linux")]
+    [SupportedOSPlatform("android")]
+    [SupportedOSPlatform("ios")]
+    internal class NanosleepEvent : IPreciseSleepEvent
+    {
+        private readonly AutoResetEvent _waitEvent = new(false);
+        private readonly NanosleepPool _pool;
+
+        public NanosleepEvent()
+        {
+            _pool = new NanosleepPool(_waitEvent);
+        }
+
+        public long AdjustTimePoint(long timePoint, long timeoutNs)
+        {
+            // No adjustment
+            return timePoint;
+        }
+
+        public bool SleepUntil(long timePoint)
+        {
+            long now = PerformanceCounter.ElapsedTicks;
+            long delta = (timePoint - now);
+            long ms = Math.Min(delta / PerformanceCounter.TicksPerMillisecond, int.MaxValue);
+            long ns = (delta * 1_000_000) / PerformanceCounter.TicksPerMillisecond;
+
+            if (ms > 0)
+            {
+                _waitEvent.WaitOne((int)ms);
+
+                return true;
+            }
+            else if (ns - Nanosleep.Bias > 0)
+            {
+                // Don't bother starting a sleep if there's already a signal active.
+                if (_waitEvent.WaitOne(0))
+                {
+                    return true;
+                }
+
+                // The 1ms wait will be interrupted by the nanosleep timeout if it completes.
+                if (!_pool.SleepAndSignal(ns, timePoint))
+                {
+                    // Too many threads on the pool.
+                    return false;
+                }
+                _waitEvent.WaitOne(1);
+                _pool.IgnoreSignal();
+
+                return true;
+            }
+
+            return false;
+        }
+
+        public void Sleep()
+        {
+            _waitEvent.WaitOne();
+        }
+
+        public void Signal()
+        {
+            _waitEvent.Set();
+        }
+
+        public void Dispose()
+        {
+            GC.SuppressFinalize(this);
+
+            _pool.Dispose();
+            _waitEvent.Dispose();
+        }
+    }
+}
diff --git a/src/Ryujinx.Common/PreciseSleep/NanosleepPool.cs b/src/Ryujinx.Common/PreciseSleep/NanosleepPool.cs
new file mode 100644
index 00000000..c0973dcb
--- /dev/null
+++ b/src/Ryujinx.Common/PreciseSleep/NanosleepPool.cs
@@ -0,0 +1,228 @@
+using System;
+using System.Collections.Generic;
+using System.Runtime.Versioning;
+using System.Threading;
+
+namespace Ryujinx.Common.PreciseSleep
+{
+    /// <summary>
+    /// A pool of threads used to allow "interruptable" nanosleep for a single target event.
+    /// </summary>
+    [SupportedOSPlatform("macos")]
+    [SupportedOSPlatform("linux")]
+    [SupportedOSPlatform("android")]
+    [SupportedOSPlatform("ios")]
+    internal class NanosleepPool : IDisposable
+    {
+        public const int MaxThreads = 8;
+
+        /// <summary>
+        /// A thread that nanosleeps and may signal an event on wake.
+        /// When a thread is assigned a nanosleep to perform, it also gets a signal ID.
+        /// The pool's target event is only signalled if this ID matches the latest dispatched one.
+        /// </summary>
+        private class NanosleepThread : IDisposable
+        {
+            private static readonly long _timePointEpsilon;
+
+            static NanosleepThread()
+            {
+                _timePointEpsilon = PerformanceCounter.TicksPerMillisecond / 100; // 0.01ms
+            }
+
+            private readonly Thread _thread;
+            private readonly NanosleepPool _parent;
+            private readonly AutoResetEvent _newWaitEvent;
+            private bool _running = true;
+
+            private long _signalId;
+            private long _nanoseconds;
+            private long _timePoint;
+
+            public long SignalId => _signalId;
+
+            /// <summary>
+            /// Creates a new NanosleepThread for a parent pool, with a specified thread ID.
+            /// </summary>
+            /// <param name="parent">Parent NanosleepPool</param>
+            /// <param name="id">Thread ID</param>
+            public NanosleepThread(NanosleepPool parent, int id)
+            {
+                _parent = parent;
+                _newWaitEvent = new(false);
+
+                _thread = new Thread(Loop)
+                {
+                    Name = $"Common.Nanosleep.{id}",
+                    Priority = ThreadPriority.AboveNormal,
+                    IsBackground = true
+                };
+
+                _thread.Start();
+            }
+
+            /// <summary>
+            /// Service requests to perform a nanosleep, signal parent pool when complete.
+            /// </summary>
+            private void Loop()
+            {
+                _newWaitEvent.WaitOne();
+
+                while (_running)
+                {
+                    Nanosleep.Sleep(_nanoseconds);
+
+                    _parent.Signal(this);
+                    _newWaitEvent.WaitOne();
+                }
+
+                _newWaitEvent.Dispose();
+            }
+
+            /// <summary>
+            /// Assign a nanosleep for this thread to perform, then signal at the end.
+            /// </summary>
+            /// <param name="nanoseconds">Nanoseconds to sleep</param>
+            /// <param name="signalId">Signal ID</param>
+            /// <param name="timePoint">Target timepoint</param>
+            public void SleepAndSignal(long nanoseconds, long signalId, long timePoint)
+            {
+                _signalId = signalId;
+                _nanoseconds = nanoseconds;
+                _timePoint = timePoint;
+                _newWaitEvent.Set();
+            }
+
+            /// <summary>
+            /// Resurrect an active nanosleep's signal if its target timepoint is a close enough match.
+            /// </summary>
+            /// <param name="signalId">New signal id to assign the nanosleep</param>
+            /// <param name="timePoint">Target timepoint</param>
+            /// <returns>True if resurrected, false otherwise</returns>
+            public bool Resurrect(long signalId, long timePoint)
+            {
+                if (Math.Abs(timePoint - _timePoint) < _timePointEpsilon)
+                {
+                    _signalId = signalId;
+
+                    return true;
+                }
+
+                return false;
+            }
+
+            /// <summary>
+            /// Dispose the NanosleepThread, interrupting its worker loop.
+            /// </summary>
+            public void Dispose()
+            {
+                if (_running)
+                {
+                    _running = false;
+                    _newWaitEvent.Set();
+                }
+            }
+        }
+
+        private readonly object _lock = new();
+        private readonly List<NanosleepThread> _threads = new();
+        private readonly List<NanosleepThread> _active = new();
+        private readonly Stack<NanosleepThread> _free = new();
+        private readonly AutoResetEvent _signalTarget;
+
+        private long _signalId;
+
+        /// <summary>
+        /// Creates a new NanosleepPool with a target event to signal when a nanosleep completes.
+        /// </summary>
+        /// <param name="signalTarget">Event to signal when nanosleeps complete</param>
+        public NanosleepPool(AutoResetEvent signalTarget)
+        {
+            _signalTarget = signalTarget;
+        }
+
+        /// <summary>
+        /// Signal the target event (if the source sleep has not been superseded)
+        /// and free the nanosleep thread.
+        /// </summary>
+        /// <param name="thread">Nanosleep thread that completed</param>
+        private void Signal(NanosleepThread thread)
+        {
+            lock (_lock)
+            {
+                _active.Remove(thread);
+                _free.Push(thread);
+
+                if (thread.SignalId == _signalId)
+                {
+                    _signalTarget.Set();
+                }
+            }
+        }
+
+        /// <summary>
+        /// Sleep for the given number of nanoseconds and signal the target event.
+        /// This does not block the caller thread.
+        /// </summary>
+        /// <param name="nanoseconds">Nanoseconds to sleep</param>
+        /// <param name="timePoint">Target timepoint</param>
+        /// <returns>True if the signal will be set, false otherwise</returns>
+        public bool SleepAndSignal(long nanoseconds, long timePoint)
+        {
+            lock (_lock)
+            {
+                _signalId++;
+
+                // Check active sleeps, if any line up with the requested timepoint then resurrect that nanosleep.
+                foreach (NanosleepThread existing in _active)
+                {
+                    if (existing.Resurrect(_signalId, timePoint))
+                    {
+                        return true;
+                    }
+                }
+
+                if (!_free.TryPop(out NanosleepThread thread))
+                {
+                    if (_threads.Count >= MaxThreads)
+                    {
+                        return false;
+                    }
+
+                    thread = new NanosleepThread(this, _threads.Count);
+
+                    _threads.Add(thread);
+                }
+
+                _active.Add(thread);
+
+                thread.SleepAndSignal(nanoseconds, _signalId, timePoint);
+
+                return true;
+            }
+        }
+
+        /// <summary>
+        /// Ignore the latest nanosleep.
+        /// </summary>
+        public void IgnoreSignal()
+        {
+            _signalId++;
+        }
+
+        /// <summary>
+        /// Dispose the NanosleepPool, disposing all of its active threads.
+        /// </summary>
+        public void Dispose()
+        {
+            GC.SuppressFinalize(this);
+
+            foreach (NanosleepThread thread in _threads)
+            {
+                thread.Dispose();
+            }
+
+            _threads.Clear();
+        }
+    }
+}
diff --git a/src/Ryujinx.Common/PreciseSleep/PreciseSleepHelper.cs b/src/Ryujinx.Common/PreciseSleep/PreciseSleepHelper.cs
new file mode 100644
index 00000000..3c30a7f6
--- /dev/null
+++ b/src/Ryujinx.Common/PreciseSleep/PreciseSleepHelper.cs
@@ -0,0 +1,104 @@
+using Ryujinx.Common.SystemInterop;
+using System;
+using System.Threading;
+
+namespace Ryujinx.Common.PreciseSleep
+{
+    public static class PreciseSleepHelper
+    {
+        /// <summary>
+        /// Create a precise sleep event for the current platform.
+        /// </summary>
+        /// <returns>A precise sleep event</returns>
+        public static IPreciseSleepEvent CreateEvent()
+        {
+            if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS() || OperatingSystem.IsIOS() || OperatingSystem.IsAndroid())
+            {
+                return new NanosleepEvent();
+            }
+            else if (OperatingSystem.IsWindows())
+            {
+                return new WindowsSleepEvent();
+            }
+            else
+            {
+                return new SleepEvent();
+            }
+        }
+
+        /// <summary>
+        /// Sleeps up to the closest point to the timepoint that the OS reasonably allows.
+        /// The provided event is used by the timer to wake the current thread, and should not be signalled from any other source.
+        /// </summary>
+        /// <param name="evt">Event used to wake this thread</param>
+        /// <param name="timePoint">Target timepoint in host ticks</param>
+        public static void SleepUntilTimePoint(EventWaitHandle evt, long timePoint)
+        {
+            if (OperatingSystem.IsWindows())
+            {
+                WindowsGranularTimer.Instance.SleepUntilTimePointWithoutExternalSignal(evt, timePoint);
+            }
+            else
+            {
+                // Events might oversleep by a little, depending on OS.
+                // We don't want to miss the timepoint, so bias the wait to be lower.
+                // Nanosleep can possibly handle it better, too.
+                long accuracyBias = PerformanceCounter.TicksPerMillisecond / 2;
+                long now = PerformanceCounter.ElapsedTicks + accuracyBias;
+                long ms = Math.Min((timePoint - now) / PerformanceCounter.TicksPerMillisecond, int.MaxValue);
+
+                if (ms > 0)
+                {
+                    evt.WaitOne((int)ms);
+                }
+
+                if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS() || OperatingSystem.IsIOS() || OperatingSystem.IsAndroid())
+                {
+                    // Do a nanosleep.
+                    now = PerformanceCounter.ElapsedTicks;
+                    long ns = ((timePoint - now) * 1_000_000) / PerformanceCounter.TicksPerMillisecond;
+
+                    Nanosleep.SleepAtMost(ns);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Spinwait until the given timepoint. If wakeSignal is or becomes 1, return early.
+        /// Thread is allowed to yield.
+        /// </summary>
+        /// <param name="timePoint">Target timepoint in host ticks</param>
+        /// <param name="wakeSignal">Returns early if this is set to 1</param>
+        public static void SpinWaitUntilTimePoint(long timePoint, ref long wakeSignal)
+        {
+            SpinWait spinWait = new();
+
+            while (Interlocked.Read(ref wakeSignal) != 1 && PerformanceCounter.ElapsedTicks < timePoint)
+            {
+                // Our time is close - don't let SpinWait go off and potentially Thread.Sleep().
+                if (spinWait.NextSpinWillYield)
+                {
+                    Thread.Yield();
+
+                    spinWait.Reset();
+                }
+                else
+                {
+                    spinWait.SpinOnce();
+                }
+            }
+        }
+
+        /// <summary>
+        /// Spinwait until the given timepoint, with no opportunity to wake early.
+        /// </summary>
+        /// <param name="timePoint">Target timepoint in host ticks</param>
+        public static void SpinWaitUntilTimePoint(long timePoint)
+        {
+            while (PerformanceCounter.ElapsedTicks < timePoint)
+            {
+                Thread.SpinWait(5);
+            }
+        }
+    }
+}
diff --git a/src/Ryujinx.Common/PreciseSleep/SleepEvent.cs b/src/Ryujinx.Common/PreciseSleep/SleepEvent.cs
new file mode 100644
index 00000000..f0769d1e
--- /dev/null
+++ b/src/Ryujinx.Common/PreciseSleep/SleepEvent.cs
@@ -0,0 +1,51 @@
+using System;
+using System.Threading;
+
+namespace Ryujinx.Common.PreciseSleep
+{
+    /// <summary>
+    /// A cross-platform precise sleep event that has millisecond granularity.
+    /// </summary>
+    internal class SleepEvent : IPreciseSleepEvent
+    {
+        private readonly AutoResetEvent _waitEvent = new(false);
+
+        public long AdjustTimePoint(long timePoint, long timeoutNs)
+        {
+            // No adjustment
+            return timePoint;
+        }
+
+        public bool SleepUntil(long timePoint)
+        {
+            long now = PerformanceCounter.ElapsedTicks;
+            long ms = Math.Min((timePoint - now) / PerformanceCounter.TicksPerMillisecond, int.MaxValue);
+
+            if (ms > 0)
+            {
+                _waitEvent.WaitOne((int)ms);
+
+                return true;
+            }
+
+            return false;
+        }
+
+        public void Sleep()
+        {
+            _waitEvent.WaitOne();
+        }
+
+        public void Signal()
+        {
+            _waitEvent.Set();
+        }
+
+        public void Dispose()
+        {
+            GC.SuppressFinalize(this);
+
+            _waitEvent.Dispose();
+        }
+    }
+}
diff --git a/src/Ryujinx.Common/PreciseSleep/WindowsGranularTimer.cs b/src/Ryujinx.Common/PreciseSleep/WindowsGranularTimer.cs
new file mode 100644
index 00000000..a0de1634
--- /dev/null
+++ b/src/Ryujinx.Common/PreciseSleep/WindowsGranularTimer.cs
@@ -0,0 +1,220 @@
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Runtime.Versioning;
+using System.Threading;
+
+namespace Ryujinx.Common.SystemInterop
+{
+    /// <summary>
+    /// Timer that attempts to align with the hardware timer interrupt,
+    /// and can alert listeners on ticks.
+    /// </summary>
+    [SupportedOSPlatform("windows")]
+    internal partial class WindowsGranularTimer
+    {
+        private const int MinimumGranularity = 5000;
+
+        private static readonly WindowsGranularTimer _instance = new();
+        public static WindowsGranularTimer Instance => _instance;
+
+        private readonly struct WaitingObject
+        {
+            public readonly long Id;
+            public readonly EventWaitHandle Signal;
+            public readonly long TimePoint;
+
+            public WaitingObject(long id, EventWaitHandle signal, long timePoint)
+            {
+                Id = id;
+                Signal = signal;
+                TimePoint = timePoint;
+            }
+        }
+
+        [LibraryImport("ntdll.dll", SetLastError = true)]
+        private static partial int NtSetTimerResolution(int DesiredResolution, [MarshalAs(UnmanagedType.Bool)] bool SetResolution, out int CurrentResolution);
+
+        [LibraryImport("ntdll.dll", SetLastError = true)]
+        private static partial int NtQueryTimerResolution(out int MaximumResolution, out int MinimumResolution, out int CurrentResolution);
+
+        [LibraryImport("ntdll.dll", SetLastError = true)]
+        private static partial uint NtDelayExecution([MarshalAs(UnmanagedType.Bool)] bool Alertable, ref long DelayInterval);
+
+        public long GranularityNs => _granularityNs;
+        public long GranularityTicks => _granularityTicks;
+
+        private readonly Thread _timerThread;
+        private long _granularityNs = MinimumGranularity * 100L;
+        private long _granularityTicks;
+        private long _lastTicks = PerformanceCounter.ElapsedTicks;
+        private long _lastId;
+
+        private readonly object _lock = new();
+        private readonly List<WaitingObject> _waitingObjects = new();
+
+        private WindowsGranularTimer()
+        {
+            _timerThread = new Thread(Loop)
+            {
+                IsBackground = true,
+                Name = "Common.WindowsTimer",
+                Priority = ThreadPriority.Highest
+            };
+
+            _timerThread.Start();
+        }
+
+        /// <summary>
+        /// Measure and initialize the timer's target granularity.
+        /// </summary>
+        private void Initialize()
+        {
+            NtQueryTimerResolution(out _, out int min, out int curr);
+
+            if (min > 0)
+            {
+                min = Math.Max(min, MinimumGranularity);
+
+                _granularityNs = min * 100L;
+                NtSetTimerResolution(min, true, out _);
+            }
+            else
+            {
+                _granularityNs = curr * 100L;
+            }
+
+            _granularityTicks = (_granularityNs * PerformanceCounter.TicksPerMillisecond) / 1_000_000;
+        }
+
+        /// <summary>
+        /// Main loop for the timer thread. Wakes every clock tick and signals any listeners,
+        /// as well as keeping track of clock alignment.
+        /// </summary>
+        private void Loop()
+        {
+            Initialize();
+            while (true)
+            {
+                long delayInterval = -1; // Next tick
+                NtSetTimerResolution((int)(_granularityNs / 100), true, out _);
+                NtDelayExecution(false, ref delayInterval);
+
+                long newTicks = PerformanceCounter.ElapsedTicks;
+                long nextTicks = newTicks + _granularityTicks;
+
+                lock (_lock)
+                {
+                    for (int i = 0; i < _waitingObjects.Count; i++)
+                    {
+                        if (nextTicks > _waitingObjects[i].TimePoint)
+                        {
+                            // The next clock tick will be after the timepoint, we need to signal now.
+                            _waitingObjects[i].Signal.Set();
+
+                            _waitingObjects.RemoveAt(i--);
+                        }
+                    }
+
+                    _lastTicks = newTicks;
+                }
+            }
+        }
+
+        /// <summary>
+        /// Sleep until a timepoint.
+        /// </summary>
+        /// <param name="evt">Reset event to use to be awoken by the clock tick, or an external signal</param>
+        /// <param name="timePoint">Target timepoint</param>
+        /// <returns>True if waited or signalled, false otherwise</returns>
+        public bool SleepUntilTimePoint(AutoResetEvent evt, long timePoint)
+        {
+            if (evt.WaitOne(0))
+            {
+                return true;
+            }
+
+            long id;
+
+            lock (_lock)
+            {
+                // Return immediately if the next tick is after the requested timepoint.
+                long nextTicks = _lastTicks + _granularityTicks;
+
+                if (nextTicks > timePoint)
+                {
+                    return false;
+                }
+
+                id = ++_lastId;
+
+                _waitingObjects.Add(new WaitingObject(id, evt, timePoint));
+            }
+
+            evt.WaitOne();
+
+            lock (_lock)
+            {
+                for (int i = 0; i < _waitingObjects.Count; i++)
+                {
+                    if (id == _waitingObjects[i].Id)
+                    {
+                        _waitingObjects.RemoveAt(i--);
+                        break;
+                    }
+                }
+            }
+
+            return true;
+        }
+
+        /// <summary>
+        /// Sleep until a timepoint, but don't expect any external signals.
+        /// </summary>
+        /// <remarks>
+        /// Saves some effort compared to the sleep that expects to be signalled.
+        /// </remarks>
+        /// <param name="evt">Reset event to use to be awoken by the clock tick</param>
+        /// <param name="timePoint">Target timepoint</param>
+        /// <returns>True if waited, false otherwise</returns>
+        public bool SleepUntilTimePointWithoutExternalSignal(EventWaitHandle evt, long timePoint)
+        {
+            long id;
+
+            lock (_lock)
+            {
+                // Return immediately if the next tick is after the requested timepoint.
+                long nextTicks = _lastTicks + _granularityTicks;
+
+                if (nextTicks > timePoint)
+                {
+                    return false;
+                }
+
+                id = ++_lastId;
+
+                _waitingObjects.Add(new WaitingObject(id, evt, timePoint));
+            }
+
+            evt.WaitOne();
+
+            return true;
+        }
+
+        /// <summary>
+        /// Returns the two nearest clock ticks for a given timepoint.
+        /// </summary>
+        /// <param name="timePoint">Target timepoint</param>
+        /// <returns>The nearest clock ticks before and after the given timepoint</returns>
+        public (long, long) ReturnNearestTicks(long timePoint)
+        {
+            long last = _lastTicks;
+            long delta = timePoint - last;
+
+            long lowTicks = delta / _granularityTicks;
+            long highTicks = (delta + _granularityTicks - 1) / _granularityTicks;
+
+            return (last + lowTicks * _granularityTicks, last + highTicks * _granularityTicks);
+        }
+    }
+}
diff --git a/src/Ryujinx.Common/PreciseSleep/WindowsSleepEvent.cs b/src/Ryujinx.Common/PreciseSleep/WindowsSleepEvent.cs
new file mode 100644
index 00000000..87c10d18
--- /dev/null
+++ b/src/Ryujinx.Common/PreciseSleep/WindowsSleepEvent.cs
@@ -0,0 +1,92 @@
+using Ryujinx.Common.SystemInterop;
+using System;
+using System.Runtime.Versioning;
+using System.Threading;
+
+namespace Ryujinx.Common.PreciseSleep
+{
+    /// <summary>
+    /// A precise sleep event that uses Windows specific methods to increase clock resolution beyond 1ms,
+    /// use the clock's phase for more precise waits, and potentially align timepoints with it.
+    /// </summary>
+    [SupportedOSPlatform("windows")]
+    internal class WindowsSleepEvent : IPreciseSleepEvent
+    {
+        /// <summary>
+        /// The clock can drift a bit, so add this to encourage the clock to still wait if the next tick is forecasted slightly before it.
+        /// </summary>
+        private const long ErrorBias = 50000;
+
+        /// <summary>
+        /// Allowed to be 0.05ms away from the clock granularity to reduce precision.
+        /// </summary>
+        private const long ClockAlignedBias = 50000;
+
+        /// <summary>
+        /// The fraction of clock granularity above the timepoint that will align it down to the lower timepoint.
+        /// Currently set to the lower 1/4, so for 0.5ms granularity: 0.1ms would be rounded down, 0.2 ms would be rounded up.
+        /// </summary>
+        private const long ReverseTimePointFraction = 4;
+
+        private readonly AutoResetEvent _waitEvent = new(false);
+        private readonly WindowsGranularTimer _timer = WindowsGranularTimer.Instance;
+
+        /// <summary>
+        /// Set to true to disable timepoint realignment.
+        /// </summary>
+        public bool Precise { get; set; } = false;
+
+        public long AdjustTimePoint(long timePoint, long timeoutNs)
+        {
+            if (Precise || timePoint == long.MaxValue)
+            {
+                return timePoint;
+            }
+
+            // Does the timeout align with the host clock?
+
+            long granularity = _timer.GranularityNs;
+            long misalignment = timeoutNs % granularity;
+
+            if ((misalignment < ClockAlignedBias || misalignment > granularity - ClockAlignedBias) && timeoutNs > ClockAlignedBias)
+            {
+                // Inaccurate sleep for 0.5ms increments, typically.
+
+                (long low, long high) = _timer.ReturnNearestTicks(timePoint);
+
+                if (timePoint - low < _timer.GranularityTicks / ReverseTimePointFraction)
+                {
+                    timePoint = low;
+                }
+                else
+                {
+                    timePoint = high;
+                }
+            }
+
+            return timePoint;
+        }
+
+        public bool SleepUntil(long timePoint)
+        {
+            return _timer.SleepUntilTimePoint(_waitEvent, timePoint + (ErrorBias * PerformanceCounter.TicksPerMillisecond) / 1_000_000);
+        }
+
+        public void Sleep()
+        {
+            _waitEvent.WaitOne();
+        }
+
+        public void Signal()
+        {
+            _waitEvent.Set();
+        }
+
+        public void Dispose()
+        {
+            GC.SuppressFinalize(this);
+
+            _waitEvent.Dispose();
+        }
+    }
+}
diff --git a/src/Ryujinx.HLE/HOS/Kernel/Common/KTimeManager.cs b/src/Ryujinx.HLE/HOS/Kernel/Common/KTimeManager.cs
index 499bc2c6..3c5fa067 100644
--- a/src/Ryujinx.HLE/HOS/Kernel/Common/KTimeManager.cs
+++ b/src/Ryujinx.HLE/HOS/Kernel/Common/KTimeManager.cs
@@ -1,4 +1,5 @@
 using Ryujinx.Common;
+using Ryujinx.Common.PreciseSleep;
 using System;
 using System.Collections.Generic;
 using System.Threading;
@@ -23,7 +24,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
 
         private readonly KernelContext _context;
         private readonly List<WaitingObject> _waitingObjects;
-        private AutoResetEvent _waitEvent;
+        private IPreciseSleepEvent _waitEvent;
         private bool _keepRunning;
         private long _enforceWakeupFromSpinWait;
 
@@ -54,6 +55,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
                 timePoint = long.MaxValue;
             }
 
+            timePoint = _waitEvent.AdjustTimePoint(timePoint, timeout);
+
             lock (_context.CriticalSection.Lock)
             {
                 _waitingObjects.Add(new WaitingObject(schedulerObj, timePoint));
@@ -64,7 +67,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
                 }
             }
 
-            _waitEvent.Set();
+            _waitEvent.Signal();
         }
 
         public void UnscheduleFutureInvocation(IKFutureSchedulerObject schedulerObj)
@@ -83,10 +86,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
 
         private void WaitAndCheckScheduledObjects()
         {
-            SpinWait spinWait = new();
             WaitingObject next;
 
-            using (_waitEvent = new AutoResetEvent(false))
+            using (_waitEvent = PreciseSleepHelper.CreateEvent())
             {
                 while (_keepRunning)
                 {
@@ -103,30 +105,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
 
                         if (next.TimePoint > timePoint)
                         {
-                            long ms = Math.Min((next.TimePoint - timePoint) / PerformanceCounter.TicksPerMillisecond, int.MaxValue);
-
-                            if (ms > 0)
-                            {
-                                _waitEvent.WaitOne((int)ms);
-                            }
-                            else
+                            if (!_waitEvent.SleepUntil(next.TimePoint))
                             {
-                                while (Interlocked.Read(ref _enforceWakeupFromSpinWait) != 1 && PerformanceCounter.ElapsedTicks < next.TimePoint)
-                                {
-                                    // Our time is close - don't let SpinWait go off and potentially Thread.Sleep().
-                                    if (spinWait.NextSpinWillYield)
-                                    {
-                                        Thread.Yield();
-
-                                        spinWait.Reset();
-                                    }
-                                    else
-                                    {
-                                        spinWait.SpinOnce();
-                                    }
-                                }
-
-                                spinWait.Reset();
+                                PreciseSleepHelper.SpinWaitUntilTimePoint(next.TimePoint, ref _enforceWakeupFromSpinWait);
                             }
                         }
 
@@ -145,7 +126,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
                     }
                     else
                     {
-                        _waitEvent.WaitOne();
+                        _waitEvent.Sleep();
                     }
                 }
             }
@@ -212,7 +193,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
         public void Dispose()
         {
             _keepRunning = false;
-            _waitEvent?.Set();
+            _waitEvent?.Signal();
         }
     }
 }
diff --git a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs
index d3d9dc03..712d640c 100644
--- a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs
+++ b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs
@@ -1,5 +1,7 @@
-using Ryujinx.Common.Configuration;
+using Ryujinx.Common;
+using Ryujinx.Common.Configuration;
 using Ryujinx.Common.Logging;
+using Ryujinx.Common.PreciseSleep;
 using Ryujinx.Graphics.GAL;
 using Ryujinx.Graphics.Gpu;
 using Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvMap;
@@ -23,9 +25,7 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
 
         private readonly Thread _composerThread;
 
-        private readonly Stopwatch _chrono;
-
-        private readonly ManualResetEvent _event = new(false);
+        private readonly AutoResetEvent _event = new(false);
         private readonly AutoResetEvent _nextFrameEvent = new(true);
         private long _ticks;
         private long _ticksPerFrame;
@@ -64,11 +64,9 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
             _composerThread = new Thread(HandleComposition)
             {
                 Name = "SurfaceFlinger.Composer",
+                Priority = ThreadPriority.AboveNormal
             };
 
-            _chrono = new Stopwatch();
-            _chrono.Start();
-
             _ticks = 0;
             _spinTicks = Stopwatch.Frequency / 500;
             _1msTicks = Stopwatch.Frequency / 1000;
@@ -299,11 +297,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
         {
             _isRunning = true;
 
-            long lastTicks = _chrono.ElapsedTicks;
+            long lastTicks = PerformanceCounter.ElapsedTicks;
 
             while (_isRunning)
             {
-                long ticks = _chrono.ElapsedTicks;
+                long ticks = PerformanceCounter.ElapsedTicks;
 
                 if (_swapInterval == 0)
                 {
@@ -336,21 +334,16 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
                     }
 
                     // Sleep if possible. If the time til the next frame is too low, spin wait instead.
-                    long diff = _ticksPerFrame - (_ticks + _chrono.ElapsedTicks - ticks);
+                    long diff = _ticksPerFrame - (_ticks + PerformanceCounter.ElapsedTicks - ticks);
                     if (diff > 0)
                     {
+                        PreciseSleepHelper.SleepUntilTimePoint(_event, PerformanceCounter.ElapsedTicks + diff);
+
+                        diff = _ticksPerFrame - (_ticks + PerformanceCounter.ElapsedTicks - ticks);
+
                         if (diff < _spinTicks)
                         {
-                            do
-                            {
-                                // SpinWait is a little more HT/SMT friendly than aggressively updating/checking ticks.
-                                // The value of 5 still gives us quite a bit of precision (~0.0003ms variance at worst) while waiting a reasonable amount of time.
-                                Thread.SpinWait(5);
-
-                                ticks = _chrono.ElapsedTicks;
-                                _ticks += ticks - lastTicks;
-                                lastTicks = ticks;
-                            } while (_ticks < _ticksPerFrame);
+                            PreciseSleepHelper.SpinWaitUntilTimePoint(PerformanceCounter.ElapsedTicks + diff);
                         }
                         else
                         {
-- 
cgit v1.2.3-70-g09d2