From a731ab3a2aad56e6ceb8b4e2444a61353246295c Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Thu, 8 Aug 2019 15:56:22 -0300
Subject: Add a new JIT compiler for CPU code (#693)

* Start of the ARMeilleure project

* Refactoring around the old IRAdapter, now renamed to PreAllocator

* Optimize the LowestBitSet method

* Add CLZ support and fix CLS implementation

* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks

* Implement the ByteSwap IR instruction, and some refactoring on the assembler

* Implement the DivideUI IR instruction and fix 64-bits IDIV

* Correct constant operand type on CSINC

* Move division instructions implementation to InstEmitDiv

* Fix destination type for the ConditionalSelect IR instruction

* Implement UMULH and SMULH, with new IR instructions

* Fix some issues with shift instructions

* Fix constant types for BFM instructions

* Fix up new tests using the new V128 struct

* Update tests

* Move DIV tests to a separate file

* Add support for calls, and some instructions that depends on them

* Start adding support for SIMD & FP types, along with some of the related ARM instructions

* Fix some typos and the divide instruction with FP operands

* Fix wrong method call on Clz_V

* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes

* Implement SIMD logical instructions and more misc. fixes

* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations

* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes

* Implement SIMD shift instruction and fix Dup_V

* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table

* Fix check with tolerance on tester

* Implement FP & SIMD comparison instructions, and some fixes

* Update FCVT (Scalar) encoding on the table to support the Half-float variants

* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes

* Use old memory access methods, made a start on SIMD memory insts support, some fixes

* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes

* Fix arguments count with struct return values, other fixes

* More instructions

* Misc. fixes and integrate LDj3SNuD fixes

* Update tests

* Add a faster linear scan allocator, unwinding support on windows, and other changes

* Update Ryujinx.HLE

* Update Ryujinx.Graphics

* Fix V128 return pointer passing, RCX is clobbered

* Update Ryujinx.Tests

* Update ITimeZoneService

* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks

* Use generic GetFunctionPointerForDelegate method and other tweaks

* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics

* Remove some unused code on the assembler

* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler

* Add hardware capability detection

* Fix regression on Sha1h and revert Fcm** changes

* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator

* Fix silly mistake introduced on last commit on CpuId

* Generate inline stack probes when the stack allocation is too large

* Initial support for the System-V ABI

* Support multiple destination operands

* Fix SSE2 VectorInsert8 path, and other fixes

* Change placement of XMM callee save and restore code to match other compilers

* Rename Dest to Destination and Inst to Instruction

* Fix a regression related to calls and the V128 type

* Add an extra space on comments to match code style

* Some refactoring

* Fix vector insert FP32 SSE2 path

* Port over the ARM32 instructions

* Avoid memory protection races on JIT Cache

* Another fix on VectorInsert FP32 (thanks to LDj3SNuD

* Float operands don't need to use the same register when VEX is supported

* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks

* Some nits, small improvements on the pre allocator

* CpuThreadState is gone

* Allow changing CPU emulators with a config entry

* Add runtime identifiers on the ARMeilleure project

* Allow switching between CPUs through a config entry (pt. 2)

* Change win10-x64 to win-x64 on projects

* Update the Ryujinx project to use ARMeilleure

* Ensure that the selected register is valid on the hybrid allocator

* Allow exiting on returns to 0 (should fix test regression)

* Remove register assignments for most used variables on the hybrid allocator

* Do not use fixed registers as spill temp

* Add missing namespace and remove unneeded using

* Address PR feedback

* Fix types, etc

* Enable AssumeStrictAbiCompliance by default

* Ensure that Spill and Fill don't load or store any more than necessary
---
 Ryujinx.Tests/Cpu/CpuTest.cs | 494 ++++++++++++++++++-------------------------
 1 file changed, 206 insertions(+), 288 deletions(-)

(limited to 'Ryujinx.Tests/Cpu/CpuTest.cs')

diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs
index b147cf44..1e7b75c6 100644
--- a/Ryujinx.Tests/Cpu/CpuTest.cs
+++ b/Ryujinx.Tests/Cpu/CpuTest.cs
@@ -1,7 +1,6 @@
-using ChocolArm64;
-using ChocolArm64.Memory;
-using ChocolArm64.State;
-using ChocolArm64.Translation;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
 
 using NUnit.Framework;
 
@@ -9,24 +8,24 @@ using Ryujinx.Tests.Unicorn;
 
 using System;
 using System.Runtime.InteropServices;
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.X86;
-using System.Threading;
 
 namespace Ryujinx.Tests.Cpu
 {
     [TestFixture]
     public class CpuTest
     {
-        protected long Position { get; private set; }
-        private long _size;
+        private ulong _currAddress;
+        private long  _size;
 
-        private long _entryPoint;
+        private ulong _entryPoint;
 
         private IntPtr _ramPointer;
 
         private MemoryManager _memory;
-        private CpuThread     _thread;
+
+        private ExecutionContext _context;
+
+        private Translator _translator;
 
         private static bool _unicornAvailable;
         private UnicornAArch64 _unicornEmu;
@@ -44,24 +43,24 @@ namespace Ryujinx.Tests.Cpu
         [SetUp]
         public void Setup()
         {
-            Position = 0x1000;
-            _size    = 0x1000;
+            _currAddress = 0x1000;
+            _size        = 0x1000;
 
-            _entryPoint = Position;
+            _entryPoint = _currAddress;
 
             _ramPointer = Marshal.AllocHGlobal(new IntPtr(_size));
             _memory = new MemoryManager(_ramPointer);
-            _memory.Map(Position, 0, _size);
+            _memory.Map((long)_currAddress, 0, _size);
 
-            Translator translator = new Translator(_memory);
+            _context = new ExecutionContext();
 
-            _thread = new CpuThread(translator, _memory, _entryPoint);
+            _translator = new Translator(_memory);
 
             if (_unicornAvailable)
             {
                 _unicornEmu = new UnicornAArch64();
-                _unicornEmu.MemoryMap((ulong)Position, (ulong)_size, MemoryPermission.READ | MemoryPermission.EXEC);
-                _unicornEmu.PC = (ulong)_entryPoint;
+                _unicornEmu.MemoryMap(_currAddress, (ulong)_size, MemoryPermission.READ | MemoryPermission.EXEC);
+                _unicornEmu.PC = _entryPoint;
             }
         }
 
@@ -70,7 +69,8 @@ namespace Ryujinx.Tests.Cpu
         {
             Marshal.FreeHGlobal(_ramPointer);
             _memory     = null;
-            _thread     = null;
+            _context    = null;
+            _translator = null;
             _unicornEmu = null;
         }
 
@@ -82,51 +82,61 @@ namespace Ryujinx.Tests.Cpu
 
         protected void Opcode(uint opcode)
         {
-            _thread.Memory.WriteUInt32(Position, opcode);
+            _memory.WriteUInt32((long)_currAddress, opcode);
 
             if (_unicornAvailable)
             {
-                _unicornEmu.MemoryWrite32((ulong)Position, opcode);
+                _unicornEmu.MemoryWrite32((ulong)_currAddress, opcode);
             }
 
-            Position += 4;
+            _currAddress += 4;
         }
 
-        protected void SetThreadState(ulong x0 = 0, ulong x1 = 0, ulong x2 = 0, ulong x3 = 0, ulong x31 = 0,
-                                      Vector128<float> v0  = default(Vector128<float>),
-                                      Vector128<float> v1  = default(Vector128<float>),
-                                      Vector128<float> v2  = default(Vector128<float>),
-                                      Vector128<float> v3  = default(Vector128<float>),
-                                      Vector128<float> v4  = default(Vector128<float>),
-                                      Vector128<float> v5  = default(Vector128<float>),
-                                      Vector128<float> v30 = default(Vector128<float>),
-                                      Vector128<float> v31 = default(Vector128<float>),
-                                      bool overflow = false, bool carry = false, bool zero = false, bool negative = false,
-                                      int fpcr = 0x0, int fpsr = 0x0)
-        {
-            _thread.ThreadState.X0 = x0;
-            _thread.ThreadState.X1 = x1;
-            _thread.ThreadState.X2 = x2;
-            _thread.ThreadState.X3 = x3;
-
-            _thread.ThreadState.X31 = x31;
-
-            _thread.ThreadState.V0  = v0;
-            _thread.ThreadState.V1  = v1;
-            _thread.ThreadState.V2  = v2;
-            _thread.ThreadState.V3  = v3;
-            _thread.ThreadState.V4  = v4;
-            _thread.ThreadState.V5  = v5;
-            _thread.ThreadState.V30 = v30;
-            _thread.ThreadState.V31 = v31;
-
-            _thread.ThreadState.Overflow = overflow;
-            _thread.ThreadState.Carry    = carry;
-            _thread.ThreadState.Zero     = zero;
-            _thread.ThreadState.Negative = negative;
-
-            _thread.ThreadState.Fpcr = fpcr;
-            _thread.ThreadState.Fpsr = fpsr;
+        protected ExecutionContext GetContext() => _context;
+
+        protected void SetContext(ulong x0       = 0,
+                                  ulong x1       = 0,
+                                  ulong x2       = 0,
+                                  ulong x3       = 0,
+                                  ulong x31      = 0,
+                                  V128  v0       = default(V128),
+                                  V128  v1       = default(V128),
+                                  V128  v2       = default(V128),
+                                  V128  v3       = default(V128),
+                                  V128  v4       = default(V128),
+                                  V128  v5       = default(V128),
+                                  V128  v30      = default(V128),
+                                  V128  v31      = default(V128),
+                                  bool  overflow = false,
+                                  bool  carry    = false,
+                                  bool  zero     = false,
+                                  bool  negative = false,
+                                  int   fpcr     = 0,
+                                  int   fpsr     = 0)
+        {
+            _context.SetX(0, x0);
+            _context.SetX(1, x1);
+            _context.SetX(2, x2);
+            _context.SetX(3, x3);
+
+            _context.SetX(31, x31);
+
+            _context.SetV(0,  v0);
+            _context.SetV(1,  v1);
+            _context.SetV(2,  v2);
+            _context.SetV(3,  v3);
+            _context.SetV(4,  v4);
+            _context.SetV(5,  v5);
+            _context.SetV(30, v30);
+            _context.SetV(31, v31);
+
+            _context.SetPstateFlag(PState.VFlag, overflow);
+            _context.SetPstateFlag(PState.CFlag, carry);
+            _context.SetPstateFlag(PState.ZFlag, zero);
+            _context.SetPstateFlag(PState.NFlag, negative);
+
+            _context.Fpcr = (FPCR)fpcr;
+            _context.Fpsr = (FPSR)fpsr;
 
             if (_unicornAvailable)
             {
@@ -137,14 +147,14 @@ namespace Ryujinx.Tests.Cpu
 
                 _unicornEmu.SP = x31;
 
-                _unicornEmu.Q[0]  = v0;
-                _unicornEmu.Q[1]  = v1;
-                _unicornEmu.Q[2]  = v2;
-                _unicornEmu.Q[3]  = v3;
-                _unicornEmu.Q[4]  = v4;
-                _unicornEmu.Q[5]  = v5;
-                _unicornEmu.Q[30] = v30;
-                _unicornEmu.Q[31] = v31;
+                _unicornEmu.Q[0]  = V128ToSimdValue(v0);
+                _unicornEmu.Q[1]  = V128ToSimdValue(v1);
+                _unicornEmu.Q[2]  = V128ToSimdValue(v2);
+                _unicornEmu.Q[3]  = V128ToSimdValue(v3);
+                _unicornEmu.Q[4]  = V128ToSimdValue(v4);
+                _unicornEmu.Q[5]  = V128ToSimdValue(v5);
+                _unicornEmu.Q[30] = V128ToSimdValue(v30);
+                _unicornEmu.Q[31] = V128ToSimdValue(v31);
 
                 _unicornEmu.OverflowFlag = overflow;
                 _unicornEmu.CarryFlag    = carry;
@@ -158,43 +168,41 @@ namespace Ryujinx.Tests.Cpu
 
         protected void ExecuteOpcodes()
         {
-            using (ManualResetEvent wait = new ManualResetEvent(false))
-            {
-                _thread.ThreadState.Break += (sender, e) => _thread.StopExecution();
-                _thread.WorkFinished += (sender, e) => wait.Set();
-
-                _thread.Execute();
-                wait.WaitOne();
-            }
+            _translator.Execute(_context, _entryPoint);
 
             if (_unicornAvailable)
             {
-                _unicornEmu.RunForCount((ulong)(Position - _entryPoint - 8) / 4);
+                _unicornEmu.RunForCount((ulong)(_currAddress - _entryPoint - 4) / 4);
             }
         }
 
-        protected CpuThreadState GetThreadState() => _thread.ThreadState;
-
-        protected CpuThreadState SingleOpcode(uint opcode,
-                                              ulong x0 = 0, ulong x1 = 0, ulong x2 = 0, ulong x3 = 0, ulong x31 = 0,
-                                              Vector128<float> v0  = default(Vector128<float>),
-                                              Vector128<float> v1  = default(Vector128<float>),
-                                              Vector128<float> v2  = default(Vector128<float>),
-                                              Vector128<float> v3  = default(Vector128<float>),
-                                              Vector128<float> v4  = default(Vector128<float>),
-                                              Vector128<float> v5  = default(Vector128<float>),
-                                              Vector128<float> v30 = default(Vector128<float>),
-                                              Vector128<float> v31 = default(Vector128<float>),
-                                              bool overflow = false, bool carry = false, bool zero = false, bool negative = false,
-                                              int fpcr = 0x0, int fpsr = 0x0)
+        protected ExecutionContext SingleOpcode(uint  opcode,
+                                                ulong x0       = 0,
+                                                ulong x1       = 0,
+                                                ulong x2       = 0,
+                                                ulong x3       = 0,
+                                                ulong x31      = 0,
+                                                V128  v0       = default(V128),
+                                                V128  v1       = default(V128),
+                                                V128  v2       = default(V128),
+                                                V128  v3       = default(V128),
+                                                V128  v4       = default(V128),
+                                                V128  v5       = default(V128),
+                                                V128  v30      = default(V128),
+                                                V128  v31      = default(V128),
+                                                bool  overflow = false,
+                                                bool  carry    = false,
+                                                bool  zero     = false,
+                                                bool  negative = false,
+                                                int   fpcr     = 0,
+                                                int   fpsr     = 0)
         {
             Opcode(opcode);
-            Opcode(0xD4200000); // BRK #0
             Opcode(0xD65F03C0); // RET
-            SetThreadState(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr);
+            SetContext(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr);
             ExecuteOpcodes();
 
-            return GetThreadState();
+            return GetContext();
         }
 
         /// <summary>Rounding Mode control field.</summary>
@@ -279,101 +287,101 @@ namespace Ryujinx.Tests.Cpu
                 ManageFpSkips(fpSkips);
             }
 
-            Assert.That(_thread.ThreadState.X0,  Is.EqualTo(_unicornEmu.X[0]));
-            Assert.That(_thread.ThreadState.X1,  Is.EqualTo(_unicornEmu.X[1]));
-            Assert.That(_thread.ThreadState.X2,  Is.EqualTo(_unicornEmu.X[2]));
-            Assert.That(_thread.ThreadState.X3,  Is.EqualTo(_unicornEmu.X[3]));
-            Assert.That(_thread.ThreadState.X4,  Is.EqualTo(_unicornEmu.X[4]));
-            Assert.That(_thread.ThreadState.X5,  Is.EqualTo(_unicornEmu.X[5]));
-            Assert.That(_thread.ThreadState.X6,  Is.EqualTo(_unicornEmu.X[6]));
-            Assert.That(_thread.ThreadState.X7,  Is.EqualTo(_unicornEmu.X[7]));
-            Assert.That(_thread.ThreadState.X8,  Is.EqualTo(_unicornEmu.X[8]));
-            Assert.That(_thread.ThreadState.X9,  Is.EqualTo(_unicornEmu.X[9]));
-            Assert.That(_thread.ThreadState.X10, Is.EqualTo(_unicornEmu.X[10]));
-            Assert.That(_thread.ThreadState.X11, Is.EqualTo(_unicornEmu.X[11]));
-            Assert.That(_thread.ThreadState.X12, Is.EqualTo(_unicornEmu.X[12]));
-            Assert.That(_thread.ThreadState.X13, Is.EqualTo(_unicornEmu.X[13]));
-            Assert.That(_thread.ThreadState.X14, Is.EqualTo(_unicornEmu.X[14]));
-            Assert.That(_thread.ThreadState.X15, Is.EqualTo(_unicornEmu.X[15]));
-            Assert.That(_thread.ThreadState.X16, Is.EqualTo(_unicornEmu.X[16]));
-            Assert.That(_thread.ThreadState.X17, Is.EqualTo(_unicornEmu.X[17]));
-            Assert.That(_thread.ThreadState.X18, Is.EqualTo(_unicornEmu.X[18]));
-            Assert.That(_thread.ThreadState.X19, Is.EqualTo(_unicornEmu.X[19]));
-            Assert.That(_thread.ThreadState.X20, Is.EqualTo(_unicornEmu.X[20]));
-            Assert.That(_thread.ThreadState.X21, Is.EqualTo(_unicornEmu.X[21]));
-            Assert.That(_thread.ThreadState.X22, Is.EqualTo(_unicornEmu.X[22]));
-            Assert.That(_thread.ThreadState.X23, Is.EqualTo(_unicornEmu.X[23]));
-            Assert.That(_thread.ThreadState.X24, Is.EqualTo(_unicornEmu.X[24]));
-            Assert.That(_thread.ThreadState.X25, Is.EqualTo(_unicornEmu.X[25]));
-            Assert.That(_thread.ThreadState.X26, Is.EqualTo(_unicornEmu.X[26]));
-            Assert.That(_thread.ThreadState.X27, Is.EqualTo(_unicornEmu.X[27]));
-            Assert.That(_thread.ThreadState.X28, Is.EqualTo(_unicornEmu.X[28]));
-            Assert.That(_thread.ThreadState.X29, Is.EqualTo(_unicornEmu.X[29]));
-            Assert.That(_thread.ThreadState.X30, Is.EqualTo(_unicornEmu.X[30]));
-
-            Assert.That(_thread.ThreadState.X31, Is.EqualTo(_unicornEmu.SP));
+            Assert.That(_context.GetX(0),  Is.EqualTo(_unicornEmu.X[0]));
+            Assert.That(_context.GetX(1),  Is.EqualTo(_unicornEmu.X[1]));
+            Assert.That(_context.GetX(2),  Is.EqualTo(_unicornEmu.X[2]));
+            Assert.That(_context.GetX(3),  Is.EqualTo(_unicornEmu.X[3]));
+            Assert.That(_context.GetX(4),  Is.EqualTo(_unicornEmu.X[4]));
+            Assert.That(_context.GetX(5),  Is.EqualTo(_unicornEmu.X[5]));
+            Assert.That(_context.GetX(6),  Is.EqualTo(_unicornEmu.X[6]));
+            Assert.That(_context.GetX(7),  Is.EqualTo(_unicornEmu.X[7]));
+            Assert.That(_context.GetX(8),  Is.EqualTo(_unicornEmu.X[8]));
+            Assert.That(_context.GetX(9),  Is.EqualTo(_unicornEmu.X[9]));
+            Assert.That(_context.GetX(10), Is.EqualTo(_unicornEmu.X[10]));
+            Assert.That(_context.GetX(11), Is.EqualTo(_unicornEmu.X[11]));
+            Assert.That(_context.GetX(12), Is.EqualTo(_unicornEmu.X[12]));
+            Assert.That(_context.GetX(13), Is.EqualTo(_unicornEmu.X[13]));
+            Assert.That(_context.GetX(14), Is.EqualTo(_unicornEmu.X[14]));
+            Assert.That(_context.GetX(15), Is.EqualTo(_unicornEmu.X[15]));
+            Assert.That(_context.GetX(16), Is.EqualTo(_unicornEmu.X[16]));
+            Assert.That(_context.GetX(17), Is.EqualTo(_unicornEmu.X[17]));
+            Assert.That(_context.GetX(18), Is.EqualTo(_unicornEmu.X[18]));
+            Assert.That(_context.GetX(19), Is.EqualTo(_unicornEmu.X[19]));
+            Assert.That(_context.GetX(20), Is.EqualTo(_unicornEmu.X[20]));
+            Assert.That(_context.GetX(21), Is.EqualTo(_unicornEmu.X[21]));
+            Assert.That(_context.GetX(22), Is.EqualTo(_unicornEmu.X[22]));
+            Assert.That(_context.GetX(23), Is.EqualTo(_unicornEmu.X[23]));
+            Assert.That(_context.GetX(24), Is.EqualTo(_unicornEmu.X[24]));
+            Assert.That(_context.GetX(25), Is.EqualTo(_unicornEmu.X[25]));
+            Assert.That(_context.GetX(26), Is.EqualTo(_unicornEmu.X[26]));
+            Assert.That(_context.GetX(27), Is.EqualTo(_unicornEmu.X[27]));
+            Assert.That(_context.GetX(28), Is.EqualTo(_unicornEmu.X[28]));
+            Assert.That(_context.GetX(29), Is.EqualTo(_unicornEmu.X[29]));
+            Assert.That(_context.GetX(30), Is.EqualTo(_unicornEmu.X[30]));
+
+            Assert.That(_context.GetX(31), Is.EqualTo(_unicornEmu.SP));
 
             if (fpTolerances == FpTolerances.None)
             {
-                Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0]));
+                Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]));
             }
             else
             {
                 ManageFpTolerances(fpTolerances);
             }
-            Assert.That(_thread.ThreadState.V1,  Is.EqualTo(_unicornEmu.Q[1]));
-            Assert.That(_thread.ThreadState.V2,  Is.EqualTo(_unicornEmu.Q[2]));
-            Assert.That(_thread.ThreadState.V3,  Is.EqualTo(_unicornEmu.Q[3]));
-            Assert.That(_thread.ThreadState.V4,  Is.EqualTo(_unicornEmu.Q[4]));
-            Assert.That(_thread.ThreadState.V5,  Is.EqualTo(_unicornEmu.Q[5]));
-            Assert.That(_thread.ThreadState.V6,  Is.EqualTo(_unicornEmu.Q[6]));
-            Assert.That(_thread.ThreadState.V7,  Is.EqualTo(_unicornEmu.Q[7]));
-            Assert.That(_thread.ThreadState.V8,  Is.EqualTo(_unicornEmu.Q[8]));
-            Assert.That(_thread.ThreadState.V9,  Is.EqualTo(_unicornEmu.Q[9]));
-            Assert.That(_thread.ThreadState.V10, Is.EqualTo(_unicornEmu.Q[10]));
-            Assert.That(_thread.ThreadState.V11, Is.EqualTo(_unicornEmu.Q[11]));
-            Assert.That(_thread.ThreadState.V12, Is.EqualTo(_unicornEmu.Q[12]));
-            Assert.That(_thread.ThreadState.V13, Is.EqualTo(_unicornEmu.Q[13]));
-            Assert.That(_thread.ThreadState.V14, Is.EqualTo(_unicornEmu.Q[14]));
-            Assert.That(_thread.ThreadState.V15, Is.EqualTo(_unicornEmu.Q[15]));
-            Assert.That(_thread.ThreadState.V16, Is.EqualTo(_unicornEmu.Q[16]));
-            Assert.That(_thread.ThreadState.V17, Is.EqualTo(_unicornEmu.Q[17]));
-            Assert.That(_thread.ThreadState.V18, Is.EqualTo(_unicornEmu.Q[18]));
-            Assert.That(_thread.ThreadState.V19, Is.EqualTo(_unicornEmu.Q[19]));
-            Assert.That(_thread.ThreadState.V20, Is.EqualTo(_unicornEmu.Q[20]));
-            Assert.That(_thread.ThreadState.V21, Is.EqualTo(_unicornEmu.Q[21]));
-            Assert.That(_thread.ThreadState.V22, Is.EqualTo(_unicornEmu.Q[22]));
-            Assert.That(_thread.ThreadState.V23, Is.EqualTo(_unicornEmu.Q[23]));
-            Assert.That(_thread.ThreadState.V24, Is.EqualTo(_unicornEmu.Q[24]));
-            Assert.That(_thread.ThreadState.V25, Is.EqualTo(_unicornEmu.Q[25]));
-            Assert.That(_thread.ThreadState.V26, Is.EqualTo(_unicornEmu.Q[26]));
-            Assert.That(_thread.ThreadState.V27, Is.EqualTo(_unicornEmu.Q[27]));
-            Assert.That(_thread.ThreadState.V28, Is.EqualTo(_unicornEmu.Q[28]));
-            Assert.That(_thread.ThreadState.V29, Is.EqualTo(_unicornEmu.Q[29]));
-            Assert.That(_thread.ThreadState.V30, Is.EqualTo(_unicornEmu.Q[30]));
-            Assert.That(_thread.ThreadState.V31, Is.EqualTo(_unicornEmu.Q[31]));
-
-            Assert.That(_thread.ThreadState.Fpcr,                 Is.EqualTo(_unicornEmu.Fpcr));
-            Assert.That(_thread.ThreadState.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask));
-
-            Assert.That(_thread.ThreadState.Overflow, Is.EqualTo(_unicornEmu.OverflowFlag));
-            Assert.That(_thread.ThreadState.Carry,    Is.EqualTo(_unicornEmu.CarryFlag));
-            Assert.That(_thread.ThreadState.Zero,     Is.EqualTo(_unicornEmu.ZeroFlag));
-            Assert.That(_thread.ThreadState.Negative, Is.EqualTo(_unicornEmu.NegativeFlag));
+            Assert.That(V128ToSimdValue(_context.GetV(1)),  Is.EqualTo(_unicornEmu.Q[1]));
+            Assert.That(V128ToSimdValue(_context.GetV(2)),  Is.EqualTo(_unicornEmu.Q[2]));
+            Assert.That(V128ToSimdValue(_context.GetV(3)),  Is.EqualTo(_unicornEmu.Q[3]));
+            Assert.That(V128ToSimdValue(_context.GetV(4)),  Is.EqualTo(_unicornEmu.Q[4]));
+            Assert.That(V128ToSimdValue(_context.GetV(5)),  Is.EqualTo(_unicornEmu.Q[5]));
+            Assert.That(V128ToSimdValue(_context.GetV(6)),  Is.EqualTo(_unicornEmu.Q[6]));
+            Assert.That(V128ToSimdValue(_context.GetV(7)),  Is.EqualTo(_unicornEmu.Q[7]));
+            Assert.That(V128ToSimdValue(_context.GetV(8)),  Is.EqualTo(_unicornEmu.Q[8]));
+            Assert.That(V128ToSimdValue(_context.GetV(9)),  Is.EqualTo(_unicornEmu.Q[9]));
+            Assert.That(V128ToSimdValue(_context.GetV(10)), Is.EqualTo(_unicornEmu.Q[10]));
+            Assert.That(V128ToSimdValue(_context.GetV(11)), Is.EqualTo(_unicornEmu.Q[11]));
+            Assert.That(V128ToSimdValue(_context.GetV(12)), Is.EqualTo(_unicornEmu.Q[12]));
+            Assert.That(V128ToSimdValue(_context.GetV(13)), Is.EqualTo(_unicornEmu.Q[13]));
+            Assert.That(V128ToSimdValue(_context.GetV(14)), Is.EqualTo(_unicornEmu.Q[14]));
+            Assert.That(V128ToSimdValue(_context.GetV(15)), Is.EqualTo(_unicornEmu.Q[15]));
+            Assert.That(V128ToSimdValue(_context.GetV(16)), Is.EqualTo(_unicornEmu.Q[16]));
+            Assert.That(V128ToSimdValue(_context.GetV(17)), Is.EqualTo(_unicornEmu.Q[17]));
+            Assert.That(V128ToSimdValue(_context.GetV(18)), Is.EqualTo(_unicornEmu.Q[18]));
+            Assert.That(V128ToSimdValue(_context.GetV(19)), Is.EqualTo(_unicornEmu.Q[19]));
+            Assert.That(V128ToSimdValue(_context.GetV(20)), Is.EqualTo(_unicornEmu.Q[20]));
+            Assert.That(V128ToSimdValue(_context.GetV(21)), Is.EqualTo(_unicornEmu.Q[21]));
+            Assert.That(V128ToSimdValue(_context.GetV(22)), Is.EqualTo(_unicornEmu.Q[22]));
+            Assert.That(V128ToSimdValue(_context.GetV(23)), Is.EqualTo(_unicornEmu.Q[23]));
+            Assert.That(V128ToSimdValue(_context.GetV(24)), Is.EqualTo(_unicornEmu.Q[24]));
+            Assert.That(V128ToSimdValue(_context.GetV(25)), Is.EqualTo(_unicornEmu.Q[25]));
+            Assert.That(V128ToSimdValue(_context.GetV(26)), Is.EqualTo(_unicornEmu.Q[26]));
+            Assert.That(V128ToSimdValue(_context.GetV(27)), Is.EqualTo(_unicornEmu.Q[27]));
+            Assert.That(V128ToSimdValue(_context.GetV(28)), Is.EqualTo(_unicornEmu.Q[28]));
+            Assert.That(V128ToSimdValue(_context.GetV(29)), Is.EqualTo(_unicornEmu.Q[29]));
+            Assert.That(V128ToSimdValue(_context.GetV(30)), Is.EqualTo(_unicornEmu.Q[30]));
+            Assert.That(V128ToSimdValue(_context.GetV(31)), Is.EqualTo(_unicornEmu.Q[31]));
+
+            Assert.That((int)_context.Fpcr,                 Is.EqualTo(_unicornEmu.Fpcr));
+            Assert.That((int)_context.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask));
+
+            Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag));
+            Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag));
+            Assert.That(_context.GetPstateFlag(PState.ZFlag), Is.EqualTo(_unicornEmu.ZeroFlag));
+            Assert.That(_context.GetPstateFlag(PState.NFlag), Is.EqualTo(_unicornEmu.NegativeFlag));
         }
 
         private void ManageFpSkips(FpSkips fpSkips)
         {
             if (fpSkips.HasFlag(FpSkips.IfNaNS))
             {
-                if (float.IsNaN(VectorExtractSingle(_unicornEmu.Q[0], (byte)0)))
+                if (float.IsNaN(_unicornEmu.Q[0].AsFloat()))
                 {
                     Assert.Ignore("NaN test.");
                 }
             }
             else if (fpSkips.HasFlag(FpSkips.IfNaND))
             {
-                if (double.IsNaN(VectorExtractDouble(_unicornEmu.Q[0], (byte)0)))
+                if (double.IsNaN(_unicornEmu.Q[0].AsDouble()))
                 {
                     Assert.Ignore("NaN test.");
                 }
@@ -398,158 +406,68 @@ namespace Ryujinx.Tests.Cpu
 
         private void ManageFpTolerances(FpTolerances fpTolerances)
         {
-            if (!Is.EqualTo(_unicornEmu.Q[0]).ApplyTo(_thread.ThreadState.V0).IsSuccess)
+            bool IsNormalOrSubnormalS(float f)  => float.IsNormal(f)  || float.IsSubnormal(f);
+            bool IsNormalOrSubnormalD(double d) => double.IsNormal(d) || double.IsSubnormal(d);
+
+            if (!Is.EqualTo(_unicornEmu.Q[0]).ApplyTo(V128ToSimdValue(_context.GetV(0))).IsSuccess)
             {
                 if (fpTolerances == FpTolerances.UpToOneUlpsS)
                 {
-                    if (IsNormalOrSubnormalS(VectorExtractSingle(_unicornEmu.Q[0],       (byte)0)) &&
-                        IsNormalOrSubnormalS(VectorExtractSingle(_thread.ThreadState.V0, (byte)0)))
+                    if (IsNormalOrSubnormalS(_unicornEmu.Q[0].AsFloat()) &&
+                        IsNormalOrSubnormalS(_context.GetV(0).AsFloat()))
                     {
-                        Assert.That   (VectorExtractSingle(_thread.ThreadState.V0, (byte)0),
-                            Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0],       (byte)0)).Within(1).Ulps);
-                        Assert.That   (VectorExtractSingle(_thread.ThreadState.V0, (byte)1),
-                            Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0],       (byte)1)).Within(1).Ulps);
-                        Assert.That   (VectorExtractSingle(_thread.ThreadState.V0, (byte)2),
-                            Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0],       (byte)2)).Within(1).Ulps);
-                        Assert.That   (VectorExtractSingle(_thread.ThreadState.V0, (byte)3),
-                            Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0],       (byte)3)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetFloat(0),
+                            Is.EqualTo(_unicornEmu.Q[0].GetFloat(0)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetFloat(1),
+                            Is.EqualTo(_unicornEmu.Q[0].GetFloat(1)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetFloat(2),
+                            Is.EqualTo(_unicornEmu.Q[0].GetFloat(2)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetFloat(3),
+                            Is.EqualTo(_unicornEmu.Q[0].GetFloat(3)).Within(1).Ulps);
 
                         Console.WriteLine(fpTolerances);
                     }
                     else
                     {
-                        Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0]));
+                        Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]));
                     }
                 }
 
                 if (fpTolerances == FpTolerances.UpToOneUlpsD)
                 {
-                    if (IsNormalOrSubnormalD(VectorExtractDouble(_unicornEmu.Q[0],       (byte)0)) &&
-                        IsNormalOrSubnormalD(VectorExtractDouble(_thread.ThreadState.V0, (byte)0)))
+                    if (IsNormalOrSubnormalD(_unicornEmu.Q[0].AsDouble()) &&
+                        IsNormalOrSubnormalD(_context.GetV(0).AsDouble()))
                     {
-                        Assert.That   (VectorExtractDouble(_thread.ThreadState.V0, (byte)0),
-                            Is.EqualTo(VectorExtractDouble(_unicornEmu.Q[0],       (byte)0)).Within(1).Ulps);
-                        Assert.That   (VectorExtractDouble(_thread.ThreadState.V0, (byte)1),
-                            Is.EqualTo(VectorExtractDouble(_unicornEmu.Q[0],       (byte)1)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetDouble(0),
+                            Is.EqualTo(_unicornEmu.Q[0].GetDouble(0)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetDouble(1),
+                            Is.EqualTo(_unicornEmu.Q[0].GetDouble(1)).Within(1).Ulps);
 
                         Console.WriteLine(fpTolerances);
                     }
                     else
                     {
-                        Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0]));
+                        Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]));
                     }
                 }
             }
-
-            bool IsNormalOrSubnormalS(float f)  => float.IsNormal(f)  || float.IsSubnormal(f);
-
-            bool IsNormalOrSubnormalD(double d) => double.IsNormal(d) || double.IsSubnormal(d);
-        }
-
-        protected static Vector128<float> MakeVectorE0(double e0)
-        {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse.StaticCast<long, float>(Sse2.SetVector128(0, BitConverter.DoubleToInt64Bits(e0)));
-        }
-
-        protected static Vector128<float> MakeVectorE0E1(double e0, double e1)
-        {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse.StaticCast<long, float>(
-                Sse2.SetVector128(BitConverter.DoubleToInt64Bits(e1), BitConverter.DoubleToInt64Bits(e0)));
-        }
-
-        protected static Vector128<float> MakeVectorE1(double e1)
-        {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse.StaticCast<long, float>(Sse2.SetVector128(BitConverter.DoubleToInt64Bits(e1), 0));
-        }
-
-        protected static float VectorExtractSingle(Vector128<float> vector, byte index)
-        {
-            if (!Sse41.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            int value = Sse41.Extract(Sse.StaticCast<float, int>(vector), index);
-
-            return BitConverter.Int32BitsToSingle(value);
-        }
-
-        protected static double VectorExtractDouble(Vector128<float> vector, byte index)
-        {
-            if (!Sse41.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            long value = Sse41.Extract(Sse.StaticCast<float, long>(vector), index);
-
-            return BitConverter.Int64BitsToDouble(value);
-        }
-
-        protected static Vector128<float> MakeVectorE0(ulong e0)
-        {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse.StaticCast<ulong, float>(Sse2.SetVector128(0, e0));
-        }
-
-        protected static Vector128<float> MakeVectorE0E1(ulong e0, ulong e1)
-        {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse.StaticCast<ulong, float>(Sse2.SetVector128(e1, e0));
         }
 
-        protected static Vector128<float> MakeVectorE1(ulong e1)
+        private static SimdValue V128ToSimdValue(V128 value)
         {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse.StaticCast<ulong, float>(Sse2.SetVector128(e1, 0));
+            return new SimdValue(value.GetUInt64(0), value.GetUInt64(1));
         }
 
-        protected static ulong GetVectorE0(Vector128<float> vector)
-        {
-            if (!Sse41.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
+        protected static V128 MakeVectorScalar(float value)  => new V128(value);
+        protected static V128 MakeVectorScalar(double value) => new V128(value);
 
-            return Sse41.Extract(Sse.StaticCast<float, ulong>(vector), (byte)0);
-        }
+        protected static V128 MakeVectorE0(ulong e0) => new V128(e0, 0);
+        protected static V128 MakeVectorE1(ulong e1) => new V128(0, e1);
 
-        protected static ulong GetVectorE1(Vector128<float> vector)
-        {
-            if (!Sse41.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
+        protected static V128 MakeVectorE0E1(ulong e0, ulong e1) => new V128(e0, e1);
 
-            return Sse41.Extract(Sse.StaticCast<float, ulong>(vector), (byte)1);
-        }
+        protected static ulong GetVectorE0(V128 vector) => vector.GetUInt64(0);
+        protected static ulong GetVectorE1(V128 vector) => vector.GetUInt64(1);
 
         protected static ushort GenNormalH()
         {
-- 
cgit v1.2.3-70-g09d2