From 5e0f8e873857ce3ca3f532aff0936beb28e412c8 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Tue, 10 Jan 2023 19:16:59 -0300
Subject: Implement JIT Arm64 backend (#4114)

* Implement JIT Arm64 backend

* PPTC version bump

* Address some feedback from Arm64 JIT PR

* Address even more PR feedback

* Remove unused IsPageAligned function

* Sync Qc flag before calls

* Fix comment and remove unused enum

* Address riperiperi PR feedback

* Delete Breakpoint IR instruction that was only implemented for Arm64
---
 ARMeilleure/Instructions/InstEmitSimdArithmetic.cs | 1494 ++++++++++++++++----
 .../Instructions/InstEmitSimdArithmetic32.cs       |  229 ++-
 ARMeilleure/Instructions/InstEmitSimdCmp.cs        |   18 +-
 ARMeilleure/Instructions/InstEmitSimdCmp32.cs      |   66 +-
 ARMeilleure/Instructions/InstEmitSimdCvt.cs        |  294 +++-
 ARMeilleure/Instructions/InstEmitSimdCvt32.cs      |  103 +-
 .../Instructions/InstEmitSimdHelper32Arm64.cs      |  366 +++++
 .../Instructions/InstEmitSimdHelperArm64.cs        |  720 ++++++++++
 ARMeilleure/Instructions/InstEmitSimdLogical.cs    |   54 +-
 ARMeilleure/Instructions/InstEmitSimdLogical32.cs  |   54 +-
 ARMeilleure/Instructions/InstEmitSimdMove32.cs     |   58 +-
 ARMeilleure/Instructions/InstEmitSimdShift.cs      |  517 ++++++-
 ARMeilleure/Instructions/InstEmitSystem.cs         |    4 +
 13 files changed, 3517 insertions(+), 460 deletions(-)
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs

(limited to 'ARMeilleure/Instructions')

diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index b91c522e..3e65db23 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -21,22 +21,47 @@ namespace ARMeilleure.Instructions
     {
         public static void Abs_S(ArmEmitterContext context)
         {
-            EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AbsS);
+            }
+            else
+            {
+                EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            }
         }
 
         public static void Abs_V(ArmEmitterContext context)
         {
-            EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AbsV);
+            }
+            else
+            {
+                EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            }
         }
 
         public static void Add_S(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64AddS);
+            }
+            else
+            {
+                EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+            }
         }
 
         public static void Add_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -62,24 +87,42 @@ namespace ARMeilleure.Instructions
 
         public static void Addhn_V(ArmEmitterContext context)
         {
-            EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64AddhnV);
+            }
+            else
+            {
+                EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false);
+            }
         }
 
         public static void Addp_S(ArmEmitterContext context)
         {
-            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AddpS);
+            }
+            else
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
-            Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
-            Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size);
+                Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+                Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size);
 
-            Operand res = context.Add(ne0, ne1);
+                Operand res = context.Add(ne0, ne1);
 
-            context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
+                context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
+            }
         }
 
         public static void Addp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddpV);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitSsse3VectorPairwiseOp(context, X86PaddInstruction);
             }
@@ -91,68 +134,89 @@ namespace ARMeilleure.Instructions
 
         public static void Addv_V(ArmEmitterContext context)
         {
-            EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AddvV);
+            }
+            else
+            {
+                EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+            }
         }
 
         public static void Cls_V(ArmEmitterContext context)
         {
-            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClsV);
+            }
+            else
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
-            Operand res = context.VectorZero();
+                Operand res = context.VectorZero();
 
-            int elems = op.GetBytesCount() >> op.Size;
+                int elems = op.GetBytesCount() >> op.Size;
 
-            int eSize = 8 << op.Size;
+                int eSize = 8 << op.Size;
 
-            for (int index = 0; index < elems; index++)
-            {
-                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+                for (int index = 0; index < elems; index++)
+                {
+                    Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
 
-                Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize));
+                    Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize));
 
-                res = EmitVectorInsert(context, res, de, index, op.Size);
-            }
+                    res = EmitVectorInsert(context, res, de, index, op.Size);
+                }
 
-            context.Copy(GetVec(op.Rd), res);
+                context.Copy(GetVec(op.Rd), res);
+            }
         }
 
         public static void Clz_V(ArmEmitterContext context)
         {
-            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
-
-            int eSize = 8 << op.Size;
-
-            Operand res = eSize switch {
-                8  => Clz_V_I8 (context, GetVec(op.Rn)),
-                16 => Clz_V_I16(context, GetVec(op.Rn)),
-                32 => Clz_V_I32(context, GetVec(op.Rn)),
-                _  => default
-            };
-
-            if (res != default)
+            if (Optimizations.UseAdvSimd)
             {
-                if (op.RegisterSize == RegisterSize.Simd64)
-                {
-                    res = context.VectorZeroUpper64(res);
-                }
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClzV);
             }
             else
             {
-                int elems = op.GetBytesCount() >> op.Size;
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
-                res = context.VectorZero();
+                int eSize = 8 << op.Size;
 
-                for (int index = 0; index < elems; index++)
+                Operand res = eSize switch {
+                    8  => Clz_V_I8 (context, GetVec(op.Rn)),
+                    16 => Clz_V_I16(context, GetVec(op.Rn)),
+                    32 => Clz_V_I32(context, GetVec(op.Rn)),
+                    _  => default
+                };
+
+                if (res != default)
                 {
-                    Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+                }
+                else
+                {
+                    int elems = op.GetBytesCount() >> op.Size;
 
-                    Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize));
+                    res = context.VectorZero();
 
-                    res = EmitVectorInsert(context, res, de, index, op.Size);
+                    for (int index = 0; index < elems; index++)
+                    {
+                        Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+                        Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize));
+
+                        res = EmitVectorInsert(context, res, de, index, op.Size);
+                    }
                 }
-            }
 
-            context.Copy(GetVec(op.Rd), res);
+                context.Copy(GetVec(op.Rd), res);
+            }
         }
 
         private static Operand Clz_V_I8(ArmEmitterContext context, Operand arg)
@@ -271,36 +335,47 @@ namespace ARMeilleure.Instructions
 
         public static void Cnt_V(ArmEmitterContext context)
         {
-            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64CntV);
+            }
+            else
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
-            Operand res = context.VectorZero();
+                Operand res = context.VectorZero();
 
-            int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+                int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
 
-            for (int index = 0; index < elems; index++)
-            {
-                Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+                for (int index = 0; index < elems; index++)
+                {
+                    Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
 
-                Operand de;
+                    Operand de;
 
-                if (Optimizations.UsePopCnt)
-                {
-                    de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne);
-                }
-                else
-                {
-                    de = EmitCountSetBits8(context, ne);
+                    if (Optimizations.UsePopCnt)
+                    {
+                        de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne);
+                    }
+                    else
+                    {
+                        de = EmitCountSetBits8(context, ne);
+                    }
+
+                    res = EmitVectorInsert(context, res, de, index, 0);
                 }
 
-                res = EmitVectorInsert(context, res, de, index, 0);
+                context.Copy(GetVec(op.Rd), res);
             }
-
-            context.Copy(GetVec(op.Rd), res);
         }
 
         public static void Fabd_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FabdS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -336,7 +411,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fabd_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FabdV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -377,7 +456,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fabs_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FabsS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -405,7 +488,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fabs_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FabsV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -440,7 +527,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fadd_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
             }
@@ -459,7 +550,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fadd_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
             }
@@ -478,7 +573,11 @@ namespace ARMeilleure.Instructions
 
         public static void Faddp_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FaddpS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse3)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -506,7 +605,11 @@ namespace ARMeilleure.Instructions
 
         public static void Faddp_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
                 {
@@ -534,7 +637,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fdiv_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FdivS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
             }
@@ -553,7 +660,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fdiv_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FdivV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd);
             }
@@ -572,7 +683,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmadd_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -607,7 +722,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmax_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
                 {
@@ -628,7 +747,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmax_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
                 {
@@ -649,7 +772,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxnm_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxnmS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true);
             }
@@ -664,7 +791,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxnm_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false);
             }
@@ -679,7 +810,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxnmp_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxnmpS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
                 {
@@ -697,7 +832,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxnmp_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
                 {
@@ -715,7 +854,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxnmv_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxnmvV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
                 {
@@ -733,7 +876,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxp_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
                 {
@@ -757,7 +904,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmaxv_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxvV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
                 {
@@ -781,7 +932,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmin_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
                 {
@@ -802,7 +957,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmin_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
                 {
@@ -823,7 +982,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminnm_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminnmS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true);
             }
@@ -838,7 +1001,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminnm_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false);
             }
@@ -853,7 +1020,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminnmp_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminnmpS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
                 {
@@ -871,7 +1042,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminnmp_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
                 {
@@ -889,7 +1064,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminnmv_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminnmvV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
                 {
@@ -907,7 +1086,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminp_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
                 {
@@ -931,7 +1114,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fminv_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminvV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
                 {
@@ -955,15 +1142,26 @@ namespace ARMeilleure.Instructions
 
         public static void Fmla_Se(ArmEmitterContext context) // Fused.
         {
-            EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaSe);
+            }
+            else
+            {
+                EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Fmla_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlaV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1006,7 +1204,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmla_Ve(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaVe);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
 
@@ -1055,15 +1257,26 @@ namespace ARMeilleure.Instructions
 
         public static void Fmls_Se(ArmEmitterContext context) // Fused.
         {
-            EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Subtract(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsSe);
+            }
+            else
+            {
+                EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+                {
+                    return context.Subtract(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Fmls_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1106,7 +1319,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmls_Ve(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsVe);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
 
@@ -1155,7 +1372,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmsub_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1190,7 +1411,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmul_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
             }
@@ -1209,12 +1434,23 @@ namespace ARMeilleure.Instructions
 
         public static void Fmul_Se(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulSe);
+            }
+            else
+            {
+                EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Fmul_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
             }
@@ -1233,7 +1469,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fmul_Ve(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulVe);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
 
@@ -1283,39 +1523,71 @@ namespace ARMeilleure.Instructions
 
         public static void Fmulx_S(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpF(context, (op1, op2) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
-            });
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulxS);
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+                });
+            }
         }
 
         public static void Fmulx_Se(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpByElemF(context, (op1, op2) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
-            });
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulxSe);
+            }
+            else
+            {
+                EmitScalarBinaryOpByElemF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+                });
+            }
         }
 
         public static void Fmulx_V(ArmEmitterContext context)
         {
-            EmitVectorBinaryOpF(context, (op1, op2) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
-            });
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulxV);
+            }
+            else
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+                });
+            }
         }
 
         public static void Fmulx_Ve(ArmEmitterContext context)
         {
-            EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
-            });
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulxVe);
+            }
+            else
+            {
+                EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+                });
+            }
         }
 
         public static void Fneg_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FnegS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -1344,7 +1616,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fneg_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FnegV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -1380,7 +1656,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fnmadd_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1423,7 +1703,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fnmsub_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1466,7 +1750,14 @@ namespace ARMeilleure.Instructions
 
         public static void Fnmul_S(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FnmulS);
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+            }
         }
 
         public static void Frecpe_S(ArmEmitterContext context)
@@ -1475,7 +1766,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrecpeS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
             {
                 Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true);
 
@@ -1496,7 +1791,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrecpeV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
             {
                 Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false);
 
@@ -1518,9 +1817,13 @@ namespace ARMeilleure.Instructions
 
         public static void Frecps_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
             {
-                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpsS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -1561,7 +1864,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frecps_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrecpsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1609,15 +1916,26 @@ namespace ARMeilleure.Instructions
 
         public static void Frecpx_S(ArmEmitterContext context)
         {
-            EmitScalarUnaryOpF(context, (op1) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1);
-            });
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpxS);
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1);
+                });
+            }
         }
 
         public static void Frinta_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintaS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearestAway);
             }
@@ -1632,7 +1950,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frinta_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintaV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearestAway);
             }
@@ -1647,23 +1969,41 @@ namespace ARMeilleure.Instructions
 
         public static void Frinti_S(ArmEmitterContext context)
         {
-            EmitScalarUnaryOpF(context, (op1) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitRoundByRMode(context, op1);
-            });
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintiS);
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitRoundByRMode(context, op1);
+                });
+            }
         }
 
         public static void Frinti_V(ArmEmitterContext context)
         {
-            EmitVectorUnaryOpF(context, (op1) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return EmitRoundByRMode(context, op1);
-            });
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintiV);
+            }
+            else
+            {
+                EmitVectorUnaryOpF(context, (op1) =>
+                {
+                    return EmitRoundByRMode(context, op1);
+                });
+            }
         }
 
         public static void Frintm_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintmS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
             }
@@ -1678,7 +2018,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintm_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintmV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
             }
@@ -1693,7 +2037,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintn_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintnS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest);
             }
@@ -1708,7 +2056,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintn_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintnV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest);
             }
@@ -1723,7 +2075,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintp_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintpS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
             }
@@ -1738,7 +2094,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintpV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
             }
@@ -1753,6 +2113,7 @@ namespace ARMeilleure.Instructions
 
         public static void Frintx_S(ArmEmitterContext context)
         {
+            // TODO Arm64: Fast path. Should we set host FPCR?
             EmitScalarUnaryOpF(context, (op1) =>
             {
                 return EmitRoundByRMode(context, op1);
@@ -1761,6 +2122,7 @@ namespace ARMeilleure.Instructions
 
         public static void Frintx_V(ArmEmitterContext context)
         {
+            // TODO Arm64: Fast path. Should we set host FPCR?
             EmitVectorUnaryOpF(context, (op1) =>
             {
                 return EmitRoundByRMode(context, op1);
@@ -1769,7 +2131,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintz_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintzS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero);
             }
@@ -1784,7 +2150,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frintz_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintzV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero);
             }
@@ -1803,7 +2173,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrsqrteS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
             {
                 Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true);
 
@@ -1824,7 +2198,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrsqrteV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
             {
                 Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false);
 
@@ -1846,7 +2224,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frsqrts_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrsqrtsS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1895,7 +2277,11 @@ namespace ARMeilleure.Instructions
 
         public static void Frsqrts_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrsqrtsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -1949,7 +2335,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fsqrt_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FsqrtS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
             }
@@ -1964,7 +2354,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fsqrt_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FsqrtV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd);
             }
@@ -1979,7 +2373,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fsub_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
             }
@@ -1998,7 +2396,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fsub_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FsubV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
             }
@@ -2017,7 +2419,11 @@ namespace ARMeilleure.Instructions
 
         public static void Mla_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlaV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorMul_AddSub(context, AddSub.Add);
             }
@@ -2032,15 +2438,26 @@ namespace ARMeilleure.Instructions
 
         public static void Mla_Ve(ArmEmitterContext context)
         {
-            EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlaVe);
+            }
+            else
+            {
+                EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Mls_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlsV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorMul_AddSub(context, AddSub.Subtract);
             }
@@ -2055,15 +2472,26 @@ namespace ARMeilleure.Instructions
 
         public static void Mls_Ve(ArmEmitterContext context)
         {
-            EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Subtract(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlsVe);
+            }
+            else
+            {
+                EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+                {
+                    return context.Subtract(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Mul_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64MulV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41VectorMul_AddSub(context, AddSub.None);
             }
@@ -2075,17 +2503,35 @@ namespace ARMeilleure.Instructions
 
         public static void Mul_Ve(ArmEmitterContext context)
         {
-            EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64MulVe);
+            }
+            else
+            {
+                EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Neg_S(ArmEmitterContext context)
         {
-            EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64NegS);
+            }
+            else
+            {
+                EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1));
+            }
         }
 
         public static void Neg_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64NegV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 
@@ -2110,7 +2556,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UsePclmulqdq && op.Size == 3)
+            if (Optimizations.UseAdvSimd && false) // Not supported by all Arm CPUs.
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV);
+            }
+            else if (Optimizations.UsePclmulqdq && op.Size == 3)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2214,33 +2664,65 @@ namespace ARMeilleure.Instructions
 
         public static void Raddhn_V(ArmEmitterContext context)
         {
-            EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RaddhnV);
+            }
+            else
+            {
+                EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true);
+            }
         }
 
         public static void Rsubhn_V(ArmEmitterContext context)
         {
-            EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RsubhnV);
+            }
+            else
+            {
+                EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true);
+            }
         }
 
         public static void Saba_V(ArmEmitterContext context)
         {
-            EmitVectorTernaryOpSx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabaV);
+            }
+            else
+            {
+                EmitVectorTernaryOpSx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+                });
+            }
         }
 
         public static void Sabal_V(ArmEmitterContext context)
         {
-            EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabalV);
+            }
+            else
+            {
+                EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+                });
+            }
         }
 
         public static void Sabd_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2262,7 +2744,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdlV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2293,12 +2779,23 @@ namespace ARMeilleure.Instructions
 
         public static void Sadalp_V(ArmEmitterContext context)
         {
-            EmitAddLongPairwise(context, signed: true, accumulate: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64SadalpV);
+            }
+            else
+            {
+                EmitAddLongPairwise(context, signed: true, accumulate: true);
+            }
         }
 
         public static void Saddl_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddlV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2328,17 +2825,35 @@ namespace ARMeilleure.Instructions
 
         public static void Saddlp_V(ArmEmitterContext context)
         {
-            EmitAddLongPairwise(context, signed: true, accumulate: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlpV);
+            }
+            else
+            {
+                EmitAddLongPairwise(context, signed: true, accumulate: false);
+            }
         }
 
         public static void Saddlv_V(ArmEmitterContext context)
         {
-            EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlvV);
+            }
+            else
+            {
+                EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
+            }
         }
 
         public static void Saddw_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddwV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2368,7 +2883,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShaddV);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2404,7 +2923,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShsubV);
+            }
+            else if (Optimizations.UseSse2 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2442,7 +2965,11 @@ namespace ARMeilleure.Instructions
 
         public static void Smax_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2468,7 +2995,11 @@ namespace ARMeilleure.Instructions
 
         public static void Smaxp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxpV);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitSsse3VectorPairwiseOp(context, X86PmaxsInstruction);
             }
@@ -2480,12 +3011,23 @@ namespace ARMeilleure.Instructions
 
         public static void Smaxv_V(ArmEmitterContext context)
         {
-            EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SmaxvV);
+            }
+            else
+            {
+                EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+            }
         }
 
         public static void Smin_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2511,7 +3053,11 @@ namespace ARMeilleure.Instructions
 
         public static void Sminp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminpV);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitSsse3VectorPairwiseOp(context, X86PminsInstruction);
             }
@@ -2523,14 +3069,25 @@ namespace ARMeilleure.Instructions
 
         public static void Sminv_V(ArmEmitterContext context)
         {
-            EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SminvV);
+            }
+            else
+            {
+                EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+            }
         }
 
         public static void Smlal_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlalV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand d = GetVec(op.Rd);
                 Operand n = GetVec(op.Rn);
@@ -2566,17 +3123,28 @@ namespace ARMeilleure.Instructions
 
         public static void Smlal_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlalVe);
+            }
+            else
+            {
+                EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Smlsl_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlslV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand d = GetVec(op.Rd);
                 Operand n = GetVec(op.Rn);
@@ -2612,117 +3180,268 @@ namespace ARMeilleure.Instructions
 
         public static void Smlsl_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Subtract(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlslVe);
+            }
+            else
+            {
+                EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+                {
+                    return context.Subtract(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Smull_V(ArmEmitterContext context)
         {
-            EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmullV);
+            }
+            else
+            {
+                EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Smull_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64SmullVe);
+            }
+            else
+            {
+                EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Sqabs_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqabsS);
+            }
+            else
+            {
+                EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            }
         }
 
         public static void Sqabs_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqabsV);
+            }
+            else
+            {
+                EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+            }
         }
 
         public static void Sqadd_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqaddS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+            }
         }
 
         public static void Sqadd_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqaddV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+            }
         }
 
         public static void Sqdmulh_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            }
         }
 
         public static void Sqdmulh_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            }
         }
 
         public static void Sqdmulh_Ve(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqdmulhVe);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+            }
         }
 
         public static void Sqneg_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqnegS);
+            }
+            else
+            {
+                EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+            }
         }
 
         public static void Sqneg_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqnegV);
+            }
+            else
+            {
+                EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+            }
         }
 
         public static void Sqrdmulh_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            }
         }
 
         public static void Sqrdmulh_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            }
         }
 
         public static void Sqrdmulh_Ve(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqrdmulhVe);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+            }
         }
 
         public static void Sqsub_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqsubS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+            }
         }
 
         public static void Sqsub_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqsubV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+            }
         }
 
         public static void Sqxtn_S(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnS);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx);
+            }
         }
 
         public static void Sqxtn_V(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnV);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx);
+            }
         }
 
         public static void Sqxtun_S(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunS);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx);
+            }
         }
 
         public static void Sqxtun_V(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunV);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx);
+            }
         }
 
         public static void Srhadd_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrhaddV);
+            }
+            else if (Optimizations.UseSse2 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2764,7 +3483,11 @@ namespace ARMeilleure.Instructions
 
         public static void Ssubl_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsublV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2794,7 +3517,11 @@ namespace ARMeilleure.Instructions
 
         public static void Ssubw_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsubwV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2822,12 +3549,23 @@ namespace ARMeilleure.Instructions
 
         public static void Sub_S(ArmEmitterContext context)
         {
-            EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SubS);
+            }
+            else
+            {
+                EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+            }
         }
 
         public static void Sub_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SubV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2853,38 +3591,77 @@ namespace ARMeilleure.Instructions
 
         public static void Subhn_V(ArmEmitterContext context)
         {
-            EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SubhnV);
+            }
+            else
+            {
+                EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false);
+            }
         }
 
         public static void Suqadd_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+            }
         }
 
         public static void Suqadd_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+            }
         }
 
         public static void Uaba_V(ArmEmitterContext context)
         {
-            EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabaV);
+            }
+            else
+            {
+                EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+                });
+            }
         }
 
         public static void Uabal_V(ArmEmitterContext context)
         {
-            EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabalV);
+            }
+            else
+            {
+                EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+                });
+            }
         }
 
         public static void Uabd_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2906,7 +3683,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdlV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -2937,12 +3718,23 @@ namespace ARMeilleure.Instructions
 
         public static void Uadalp_V(ArmEmitterContext context)
         {
-            EmitAddLongPairwise(context, signed: false, accumulate: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64UadalpV);
+            }
+            else
+            {
+                EmitAddLongPairwise(context, signed: false, accumulate: true);
+            }
         }
 
         public static void Uaddl_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddlV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -2972,17 +3764,35 @@ namespace ARMeilleure.Instructions
 
         public static void Uaddlp_V(ArmEmitterContext context)
         {
-            EmitAddLongPairwise(context, signed: false, accumulate: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlpV);
+            }
+            else
+            {
+                EmitAddLongPairwise(context, signed: false, accumulate: false);
+            }
         }
 
         public static void Uaddlv_V(ArmEmitterContext context)
         {
-            EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlvV);
+            }
+            else
+            {
+                EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+            }
         }
 
         public static void Uaddw_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddwV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -3012,7 +3822,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhaddV);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -3048,7 +3862,11 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhsubV);
+            }
+            else if (Optimizations.UseSse2 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -3079,7 +3897,11 @@ namespace ARMeilleure.Instructions
 
         public static void Umax_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -3105,7 +3927,11 @@ namespace ARMeilleure.Instructions
 
         public static void Umaxp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxpV);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitSsse3VectorPairwiseOp(context, X86PmaxuInstruction);
             }
@@ -3117,12 +3943,23 @@ namespace ARMeilleure.Instructions
 
         public static void Umaxv_V(ArmEmitterContext context)
         {
-            EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UmaxvV);
+            }
+            else
+            {
+                EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+            }
         }
 
         public static void Umin_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -3148,7 +3985,11 @@ namespace ARMeilleure.Instructions
 
         public static void Uminp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminpV);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitSsse3VectorPairwiseOp(context, X86PminuInstruction);
             }
@@ -3160,14 +4001,25 @@ namespace ARMeilleure.Instructions
 
         public static void Uminv_V(ArmEmitterContext context)
         {
-            EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UminvV);
+            }
+            else
+            {
+                EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+            }
         }
 
         public static void Umlal_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlalV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand d = GetVec(op.Rd);
                 Operand n = GetVec(op.Rn);
@@ -3203,17 +4055,28 @@ namespace ARMeilleure.Instructions
 
         public static void Umlal_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Add(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlalVe);
+            }
+            else
+            {
+                EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Umlsl_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse41 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlslV);
+            }
+            else if (Optimizations.UseSse41 && op.Size < 2)
             {
                 Operand d = GetVec(op.Rd);
                 Operand n = GetVec(op.Rn);
@@ -3249,57 +4112,124 @@ namespace ARMeilleure.Instructions
 
         public static void Umlsl_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+            if (Optimizations.UseAdvSimd)
             {
-                return context.Subtract(op1, context.Multiply(op2, op3));
-            });
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlslVe);
+            }
+            else
+            {
+                EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+                {
+                    return context.Subtract(op1, context.Multiply(op2, op3));
+                });
+            }
         }
 
         public static void Umull_V(ArmEmitterContext context)
         {
-            EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmullV);
+            }
+            else
+            {
+                EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Umull_Ve(ArmEmitterContext context)
         {
-            EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64UmullVe);
+            }
+            else
+            {
+                EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+            }
         }
 
         public static void Uqadd_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqaddS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+            }
         }
 
         public static void Uqadd_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqaddV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+            }
         }
 
         public static void Uqsub_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqsubS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+            }
         }
 
         public static void Uqsub_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqsubV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+            }
         }
 
         public static void Uqxtn_S(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnS);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx);
+            }
         }
 
         public static void Uqxtn_V(ArmEmitterContext context)
         {
-            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnV);
+            }
+            else
+            {
+                EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx);
+            }
         }
 
         public static void Urhadd_V(ArmEmitterContext context)
         {
             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size < 2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrhaddV);
+            }
+            else if (Optimizations.UseSse2 && op.Size < 2)
             {
                 Operand n = GetVec(op.Rn);
                 Operand m = GetVec(op.Rm);
@@ -3330,17 +4260,35 @@ namespace ARMeilleure.Instructions
 
         public static void Usqadd_S(ArmEmitterContext context)
         {
-            EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddS);
+            }
+            else
+            {
+                EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+            }
         }
 
         public static void Usqadd_V(ArmEmitterContext context)
         {
-            EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddV);
+            }
+            else
+            {
+                EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+            }
         }
 
         public static void Usubl_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsublV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -3370,7 +4318,11 @@ namespace ARMeilleure.Instructions
 
         public static void Usubw_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsubwV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
index 79b376e9..a9994e41 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -2,6 +2,7 @@
 using ARMeilleure.IntermediateRepresentation;
 using ARMeilleure.Translation;
 using System;
+
 using static ARMeilleure.Instructions.InstEmitFlowHelper;
 using static ARMeilleure.Instructions.InstEmitHelper;
 using static ARMeilleure.Instructions.InstEmitSimdHelper;
@@ -30,7 +31,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
 
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FabsS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarUnaryOpSimd32(context, (m) =>
                 {
@@ -49,7 +54,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseSse2)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FabsV);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2)
                 {
                     EmitVectorUnaryOpSimd32(context, (m) =>
                     {
@@ -76,7 +85,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vadd_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
             }
@@ -92,7 +105,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vadd_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FaddV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
             }
@@ -280,7 +297,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfma_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd);
             }
@@ -299,7 +320,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfma_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps);
             }
@@ -314,7 +339,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfms_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd);
             }
@@ -333,7 +362,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfms_V(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps);
             }
@@ -348,7 +381,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfnma_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
             }
@@ -367,7 +404,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vfnms_S(ArmEmitterContext context) // Fused.
         {
-            if (Optimizations.FastFP && Optimizations.UseFma)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseFma)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd);
             }
@@ -419,7 +460,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
 
-            if (Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FnegS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitScalarUnaryOpSimd32(context, (m) =>
                 {
@@ -445,7 +490,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
 
-            if (Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FnmulS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpSimd32(context, (n, m) =>
                 {
@@ -473,7 +522,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
 
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
             }
@@ -498,7 +551,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
 
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
             }
@@ -525,7 +582,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseSse2)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FnegV);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2)
                 {
                     EmitVectorUnaryOpSimd32(context, (m) =>
                     {
@@ -554,7 +615,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vdiv_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FdivS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
             }
@@ -573,7 +638,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmaxnm_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmaxnmS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF32(context, true, true);
             }
@@ -585,7 +654,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmaxnm_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxnmV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF32(context, true, false);
             }
@@ -597,7 +670,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vminnm_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FminnmS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF32(context, false, true);
             }
@@ -609,7 +686,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vminnm_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse41)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminnmV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse41)
             {
                 EmitSse41MaxMinNumOpF32(context, false, false);
             }
@@ -621,7 +702,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmax_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
             }
@@ -664,7 +749,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmin_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
             }
@@ -707,7 +796,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmla_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
             }
@@ -730,7 +823,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmla_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
             }
@@ -786,7 +883,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmls_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
             }
@@ -809,7 +910,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmls_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
             }
@@ -865,7 +970,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmul_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmulS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
             }
@@ -884,7 +993,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vmul_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmulV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
             }
@@ -975,7 +1088,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vpadd_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FaddpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps);
             }
@@ -1008,7 +1125,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vpmax_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FmaxpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps);
             }
@@ -1038,7 +1159,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vpmin_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FminpV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps);
             }
@@ -1217,7 +1342,11 @@ namespace ARMeilleure.Instructions
             {
                 int sizeF = op.Size & 1;
 
-                if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrecpeV);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
                 {
                     EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0);
                 }
@@ -1237,7 +1366,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vrecps(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrecpsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
                 bool single = (op.Size & 1) == 0;
@@ -1304,7 +1437,11 @@ namespace ARMeilleure.Instructions
             {
                 int sizeF = op.Size & 1;
 
-                if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrsqrteV);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
                 {
                     EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0);
                 }
@@ -1324,7 +1461,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vrsqrts(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrsqrtsV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
                 bool single = (op.Size & 1) == 0;
@@ -1393,7 +1534,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vsqrt_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FsqrtS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
             }
@@ -1408,7 +1553,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vsub_S(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FsubS);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
             }
@@ -1420,7 +1569,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vsub_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FsubV);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
             }
diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
index 71055155..c32b64ba 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCmp.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
@@ -466,12 +466,26 @@ namespace ARMeilleure.Instructions
 
         public static void Fcmp_S(ArmEmitterContext context)
         {
-            EmitFcmpOrFcmpe(context, signalNaNs: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: false);
+            }
+            else
+            {
+                EmitFcmpOrFcmpe(context, signalNaNs: false);
+            }
         }
 
         public static void Fcmpe_S(ArmEmitterContext context)
         {
-            EmitFcmpOrFcmpe(context, signalNaNs: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: true);
+            }
+            else
+            {
+                EmitFcmpOrFcmpe(context, signalNaNs: true);
+            }
         }
 
         private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
index 339d3293..a990e057 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
@@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
     {
         public static void Vceq_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseSse2)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, false);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseSse2)
             {
                 EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false);
             }
@@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseSse2)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, true);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2)
                 {
                     EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true);
                 }
@@ -55,7 +63,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vcge_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseAvx)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseAvx)
             {
                 EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
             }
@@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseAvx)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseAvx)
                 {
                     EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
                 }
@@ -95,7 +111,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vcgt_V(ArmEmitterContext context)
         {
-            if (Optimizations.FastFP && Optimizations.UseAvx)
+            if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, false);
+            }
+            else if (Optimizations.FastFP && Optimizations.UseAvx)
             {
                 EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false);
             }
@@ -118,7 +138,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseAvx)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, true);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseAvx)
                 {
                     EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true);
                 }
@@ -139,7 +163,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseSse2)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2)
                 {
                     EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
                 }
@@ -160,7 +188,11 @@ namespace ARMeilleure.Instructions
 
             if (op.F)
             {
-                if (Optimizations.FastFP && Optimizations.UseSse2)
+                if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThan, true);
+                }
+                else if (Optimizations.FastFP && Optimizations.UseSse2)
                 {
                     EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true);
                 }
@@ -247,12 +279,26 @@ namespace ARMeilleure.Instructions
 
         public static void Vcmp(ArmEmitterContext context)
         {
-            EmitVcmpOrVcmpe(context, false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, false);
+            }
+            else
+            {
+                EmitVcmpOrVcmpe(context, false);
+            }
         }
 
         public static void Vcmpe(ArmEmitterContext context)
         {
-            EmitVcmpOrVcmpe(context, true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, true);
+            }
+            else
+            {
+                EmitVcmpOrVcmpe(context, true);
+            }
         }
 
         private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
index 7f61cad4..652ad397 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
@@ -164,7 +164,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtas_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtasGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
             }
@@ -176,7 +180,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtas_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtasS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
             }
@@ -188,7 +196,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtas_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtasS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
             }
@@ -200,7 +212,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtau_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtauGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
             }
@@ -212,7 +228,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtau_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtauS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
             }
@@ -224,7 +244,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtau_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtauV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
             }
@@ -240,7 +264,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.UseSse2 && sizeF == 1)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtlV);
+            }
+            else if (Optimizations.UseSse2 && sizeF == 1)
             {
                 Operand n = GetVec(op.Rn);
 
@@ -296,7 +324,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtms_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmsGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
             }
@@ -308,7 +340,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtms_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtmsV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false);
             }
@@ -320,7 +356,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtmu_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmuGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
             }
@@ -336,7 +376,11 @@ namespace ARMeilleure.Instructions
 
             int sizeF = op.Size & 1;
 
-            if (Optimizations.UseSse2 && sizeF == 1)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOpFRd(context, Intrinsic.Arm64FcvtnV);
+            }
+            else if (Optimizations.UseSse2 && sizeF == 1)
             {
                 Operand d = GetVec(op.Rd);
 
@@ -405,7 +449,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtns_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtnsGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false);
             }
@@ -417,7 +465,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtns_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnsS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true);
             }
@@ -429,7 +481,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtns_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnsV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false);
             }
@@ -441,7 +497,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtnu_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnuS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true);
             }
@@ -453,7 +513,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtnu_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnuV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false);
             }
@@ -465,7 +529,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtps_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpsGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
             }
@@ -477,7 +545,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtpu_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpuGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
             }
@@ -489,7 +561,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzs_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzsGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
             }
@@ -501,7 +577,13 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzsGpFixed, op.FBits);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
             }
@@ -513,7 +595,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzs_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzsS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true);
             }
@@ -525,7 +611,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzs_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzsV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
             }
@@ -537,7 +627,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzsVFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
             }
@@ -549,7 +643,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzu_Gp(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzuGp);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
             }
@@ -561,7 +659,13 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzuGpFixed, op.FBits);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
             }
@@ -573,7 +677,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzu_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzuS);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true);
             }
@@ -585,7 +693,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzu_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzuV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
             }
@@ -597,7 +709,11 @@ namespace ARMeilleure.Instructions
 
         public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzuVFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
             }
@@ -609,41 +725,59 @@ namespace ARMeilleure.Instructions
 
         public static void Scvtf_Gp(ArmEmitterContext context)
         {
-            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
-
-            Operand res = GetIntOrZR(context, op.Rn);
-
-            if (op.RegisterSize == RegisterSize.Int32)
+            if (Optimizations.UseAdvSimd)
             {
-                res = context.SignExtend32(OperandType.I64, res);
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64ScvtfGp);
             }
+            else
+            {
+                OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+                Operand res = GetIntOrZR(context, op.Rn);
+
+                if (op.RegisterSize == RegisterSize.Int32)
+                {
+                    res = context.SignExtend32(OperandType.I64, res);
+                }
 
-            res = EmitFPConvert(context, res, op.Size, signed: true);
+                res = EmitFPConvert(context, res, op.Size, signed: true);
 
-            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
         }
 
         public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
         {
             OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 
-            Operand res = GetIntOrZR(context, op.Rn);
-
-            if (op.RegisterSize == RegisterSize.Int32)
+            if (Optimizations.UseAdvSimd)
             {
-                res = context.SignExtend32(OperandType.I64, res);
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64ScvtfGpFixed, op.FBits);
             }
+            else
+            {
+                Operand res = GetIntOrZR(context, op.Rn);
 
-            res = EmitFPConvert(context, res, op.Size, signed: true);
+                if (op.RegisterSize == RegisterSize.Int32)
+                {
+                    res = context.SignExtend32(OperandType.I64, res);
+                }
 
-            res = EmitI2fFBitsMul(context, res, op.FBits);
+                res = EmitFPConvert(context, res, op.Size, signed: true);
 
-            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+                res = EmitI2fFBitsMul(context, res, op.FBits);
+
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
         }
 
         public static void Scvtf_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64ScvtfS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2ScvtfOp(context, scalar: true);
             }
@@ -655,7 +789,11 @@ namespace ARMeilleure.Instructions
 
         public static void Scvtf_S_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64ScvtfSFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2ScvtfOp(context, scalar: true);
             }
@@ -667,7 +805,11 @@ namespace ARMeilleure.Instructions
 
         public static void Scvtf_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64ScvtfV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2ScvtfOp(context, scalar: false);
             }
@@ -679,7 +821,11 @@ namespace ARMeilleure.Instructions
 
         public static void Scvtf_V_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64ScvtfVFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2ScvtfOp(context, scalar: false);
             }
@@ -691,31 +837,49 @@ namespace ARMeilleure.Instructions
 
         public static void Ucvtf_Gp(ArmEmitterContext context)
         {
-            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64UcvtfGp);
+            }
+            else
+            {
+                OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 
-            Operand res = GetIntOrZR(context, op.Rn);
+                Operand res = GetIntOrZR(context, op.Rn);
 
-            res = EmitFPConvert(context, res, op.Size, signed: false);
+                res = EmitFPConvert(context, res, op.Size, signed: false);
 
-            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
         }
 
         public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
         {
             OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 
-            Operand res = GetIntOrZR(context, op.Rn);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64UcvtfGpFixed, op.FBits);
+            }
+            else
+            {
+                Operand res = GetIntOrZR(context, op.Rn);
 
-            res = EmitFPConvert(context, res, op.Size, signed: false);
+                res = EmitFPConvert(context, res, op.Size, signed: false);
 
-            res = EmitI2fFBitsMul(context, res, op.FBits);
+                res = EmitI2fFBitsMul(context, res, op.FBits);
 
-            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
         }
 
         public static void Ucvtf_S(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64UcvtfS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2UcvtfOp(context, scalar: true);
             }
@@ -727,7 +891,11 @@ namespace ARMeilleure.Instructions
 
         public static void Ucvtf_S_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64UcvtfSFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2UcvtfOp(context, scalar: true);
             }
@@ -739,7 +907,11 @@ namespace ARMeilleure.Instructions
 
         public static void Ucvtf_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64UcvtfV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2UcvtfOp(context, scalar: false);
             }
@@ -751,7 +923,11 @@ namespace ARMeilleure.Instructions
 
         public static void Ucvtf_V_Fixed(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64UcvtfVFixed, GetFBits(context));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitSse2UcvtfOp(context, scalar: false);
             }
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
index f3f23958..5fdc3b5a 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@@ -59,7 +59,11 @@ namespace ARMeilleure.Instructions
 
             if (toInteger)
             {
-                if (Optimizations.UseSse41)
+                if (Optimizations.UseAdvSimd)
+                {
+                    InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV);
+                }
+                else if (Optimizations.UseSse41)
                 {
                     EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
                 }
@@ -153,7 +157,28 @@ namespace ARMeilleure.Instructions
                 bool unsigned = (op.Opc2 & 1) == 0;
                 bool roundWithFpscr = op.Opc != 1;
 
-                if (!roundWithFpscr && Optimizations.UseSse41)
+                if (!roundWithFpscr && Optimizations.UseAdvSimd)
+                {
+                    bool doubleSize = floatSize == OperandType.FP64;
+
+                    if (doubleSize)
+                    {
+                        Operand m = GetVecA32(op.Vm >> 1);
+
+                        Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+                        Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble;
+
+                        Operand asInteger = context.AddIntrinsicInt(inst, toConvert);
+
+                        InsertScalar(context, op.Vd, asInteger);
+                    }
+                    else
+                    {
+                        InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS);
+                    }
+                }
+                else if (!roundWithFpscr && Optimizations.UseSse41)
                 {
                     EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
                 }
@@ -231,7 +256,34 @@ namespace ARMeilleure.Instructions
             bool unsigned = op.Opc == 0;
             int rm = op.Opc2 & 3;
 
-            if (Optimizations.UseSse41)
+            Intrinsic inst;
+
+            if (Optimizations.UseAdvSimd)
+            {
+                if (unsigned)
+                {
+                    inst = rm switch {
+                        0b00 => Intrinsic.Arm64FcvtauS,
+                        0b01 => Intrinsic.Arm64FcvtnuS,
+                        0b10 => Intrinsic.Arm64FcvtpuS,
+                        0b11 => Intrinsic.Arm64FcvtmuS,
+                        _ => throw new ArgumentOutOfRangeException(nameof(rm))
+                    };
+                }
+                else
+                {
+                    inst = rm switch {
+                        0b00 => Intrinsic.Arm64FcvtasS,
+                        0b01 => Intrinsic.Arm64FcvtnsS,
+                        0b10 => Intrinsic.Arm64FcvtpsS,
+                        0b11 => Intrinsic.Arm64FcvtmsS,
+                        _ => throw new ArgumentOutOfRangeException(nameof(rm))
+                    };
+                }
+
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
             }
@@ -338,7 +390,19 @@ namespace ARMeilleure.Instructions
 
             int rm = op.Opc2 & 3;
 
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                Intrinsic inst = rm switch {
+                    0b00 => Intrinsic.Arm64FrintaS,
+                    0b01 => Intrinsic.Arm64FrintnS,
+                    0b10 => Intrinsic.Arm64FrintpS,
+                    0b11 => Intrinsic.Arm64FrintmS,
+                    _ => throw new ArgumentOutOfRangeException(nameof(rm))
+                };
+
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
+            }
+            else if (Optimizations.UseSse41)
             {
                 EmitScalarUnaryOpSimd32(context, (m) =>
                 {
@@ -382,12 +446,9 @@ namespace ARMeilleure.Instructions
         // VRINTA (vector).
         public static void Vrinta_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
             {
-                EmitVectorUnaryOpSimd32(context, (m) =>
-                {
-                    return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: false);
-                });
+                InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS);
             }
             else
             {
@@ -398,7 +459,11 @@ namespace ARMeilleure.Instructions
         // VRINTM (vector).
         public static void Vrintm_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorUnaryOpSimd32(context, (m) =>
                 {
@@ -414,7 +479,11 @@ namespace ARMeilleure.Instructions
         // VRINTN (vector).
         public static void Vrintn_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorUnaryOpSimd32(context, (m) =>
                 {
@@ -430,7 +499,11 @@ namespace ARMeilleure.Instructions
         // VRINTP (vector).
         public static void Vrintp_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorUnaryOpSimd32(context, (m) =>
                 {
@@ -448,7 +521,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
 
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitScalarUnaryOpSimd32(context, (m) =>
                 {
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs
new file mode 100644
index 00000000..98236be6
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs
@@ -0,0 +1,366 @@
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+    using Func1I = Func<Operand, Operand>;
+    using Func2I = Func<Operand, Operand, Operand>;
+    using Func3I = Func<Operand, Operand, Operand, Operand>;
+
+    static class InstEmitSimdHelper32Arm64
+    {
+        // Intrinsic Helpers
+
+        public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
+        {
+            Debug.Assert(input.Type == OperandType.V128);
+
+            int originalSide = originalV & 1;
+            int targetSide = targetV & 1;
+
+            if (originalSide == targetSide)
+            {
+                return input;
+            }
+
+            Intrinsic vType = Intrinsic.Arm64VDWord | Intrinsic.Arm64V128;
+
+            if (targetSide == 1)
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 0)); // Low to high.
+            }
+            else
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 1)); // High to low.
+            }
+        }
+
+        public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
+        {
+            Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+            int targetSide = targetV & 1;
+            Operand idx = Const(targetSide);
+
+            return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, idx, value, idx);
+        }
+
+        public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
+        {
+            Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+            // Insert from index 0 in value to index in target.
+            int index = reg & (doubleWidth ? 1 : 3);
+
+            if (doubleWidth)
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, Const(index), value, Const(0));
+            }
+            else
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VWord, target, Const(index), value, Const(0));
+            }
+        }
+
+        public static Operand EmitExtractScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
+        {
+            int index = reg & (doubleWidth ? 1 : 3);
+            if (index == 0) return target; // Element is already at index 0, so just return the vector directly.
+
+            if (doubleWidth)
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VDWord, target, Const(1)); // Extract high (index 1).
+            }
+            else
+            {
+                return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VWord, target, Const(index)); // Extract element at index.
+            }
+        }
+
+        // Vector Operand Templates
+
+        public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
+        {
+            OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+            Operand m = GetVecA32(op.Qm);
+            Operand d = GetVecA32(op.Qd);
+
+            if (!op.Q) // Register swap: move relevant doubleword to destination side.
+            {
+                m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+            }
+
+            Operand res = vectorFunc(m);
+
+            if (!op.Q) // Register insert.
+            {
+                res = EmitDoubleWordInsert(context, d, res, op.Vd);
+            }
+
+            context.Copy(d, res);
+        }
+
+        public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
+        }
+
+        public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            Operand n = GetVecA32(op.Qn);
+            Operand m = GetVecA32(op.Qm);
+            Operand d = GetVecA32(op.Qd);
+
+            if (side == -1)
+            {
+                side = op.Vd;
+            }
+
+            if (!op.Q) // Register swap: move relevant doubleword to destination side.
+            {
+                n = EmitMoveDoubleWordToSide(context, n, op.Vn, side);
+                m = EmitMoveDoubleWordToSide(context, m, op.Vm, side);
+            }
+
+            Operand res = vectorFunc(n, m);
+
+            if (!op.Q) // Register insert.
+            {
+                if (side != op.Vd)
+                {
+                    res = EmitMoveDoubleWordToSide(context, res, side, op.Vd);
+                }
+                res = EmitDoubleWordInsert(context, d, res, op.Vd);
+            }
+
+            context.Copy(d, res);
+        }
+
+        public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            Operand n = GetVecA32(op.Qn);
+            Operand m = GetVecA32(op.Qm);
+            Operand d = GetVecA32(op.Qd);
+            Operand initialD = d;
+
+            if (!op.Q) // Register swap: move relevant doubleword to destination side.
+            {
+                n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+                m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+            }
+
+            Operand res = vectorFunc(d, n, m);
+
+            if (!op.Q) // Register insert.
+            {
+                res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
+            }
+
+            context.Copy(initialD, res);
+        }
+
+        public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
+        }
+
+        public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
+        {
+            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+            bool doubleSize = (op.Size & 1) != 0;
+            int shift = doubleSize ? 1 : 2;
+            Operand m = GetVecA32(op.Vm >> shift);
+            Operand d = GetVecA32(op.Vd >> shift);
+
+            m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+            Operand res = scalarFunc(m);
+
+            // Insert scalar into vector.
+            res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+            context.Copy(d, res);
+        }
+
+        public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
+        }
+
+        public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
+        {
+            OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+            bool doubleSize = (op.Size & 1) != 0;
+            int shift = doubleSize ? 1 : 2;
+            Operand n = GetVecA32(op.Vn >> shift);
+            Operand m = GetVecA32(op.Vm >> shift);
+            Operand d = GetVecA32(op.Vd >> shift);
+
+            n = EmitExtractScalar(context, n, op.Vn, doubleSize);
+            m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+            Operand res = scalarFunc(n, m);
+
+            // Insert scalar into vector.
+            res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+            context.Copy(d, res);
+        }
+
+        public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitScalarBinaryOpSimd32(context, (n, m) =>  context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
+        {
+            OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+            bool doubleSize = (op.Size & 1) != 0;
+            int shift = doubleSize ? 1 : 2;
+            Operand n = GetVecA32(op.Vn >> shift);
+            Operand m = GetVecA32(op.Vm >> shift);
+            Operand d = GetVecA32(op.Vd >> shift);
+            Operand initialD = d;
+
+            n = EmitExtractScalar(context, n, op.Vn, doubleSize);
+            m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+            d = EmitExtractScalar(context, d, op.Vd, doubleSize);
+
+            Operand res = scalarFunc(d, n, m);
+
+            // Insert scalar into vector.
+            res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
+
+            context.Copy(initialD, res);
+        }
+
+        public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+            EmitScalarTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
+        }
+
+        // Pairwise
+
+        public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            inst32 |= Intrinsic.Arm64V64 | Intrinsic.Arm64VFloat;
+            EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst32, n, m), 0);
+        }
+
+        public static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
+        {
+            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+            bool cmpWithZero = (op.Opc & 2) != 0;
+
+            Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
+            inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+
+            bool doubleSize = (op.Size & 1) != 0;
+            int shift = doubleSize ? 1 : 2;
+            Operand n = GetVecA32(op.Vd >> shift);
+            Operand m = GetVecA32(op.Vm >> shift);
+
+            n = EmitExtractScalar(context, n, op.Vd, doubleSize);
+            m = cmpWithZero ? Const(0) : EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+            Operand nzcv = context.AddIntrinsicInt(inst, n, m);
+
+            Operand one = Const(1);
+
+            SetFpFlag(context, FPState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
+            SetFpFlag(context, FPState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
+            SetFpFlag(context, FPState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
+            SetFpFlag(context, FPState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
+        }
+
+        public static void EmitCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
+        {
+            OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            Intrinsic inst;
+            if (zero)
+            {
+                inst = cond switch
+                {
+                    CmpCondition.Equal => Intrinsic.Arm64FcmeqVz,
+                    CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtVz,
+                    CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeVz,
+                    CmpCondition.LessThan => Intrinsic.Arm64FcmltVz,
+                    CmpCondition.LessThanOrEqual => Intrinsic.Arm64FcmleVz,
+                    _ => throw new InvalidOperationException()
+                };
+            }
+            else {
+                inst = cond switch
+                {
+                    CmpCondition.Equal => Intrinsic.Arm64FcmeqV,
+                    CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtV,
+                    CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeV,
+                    _ => throw new InvalidOperationException()
+                };
+            }
+
+            inst |= (sizeF != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+
+            if (zero)
+            {
+                EmitVectorUnaryOpSimd32(context, (m) =>
+                {
+                    return context.AddIntrinsic(inst, m);
+                });
+            }
+            else
+            {
+                EmitVectorBinaryOpSimd32(context, (n, m) =>
+                {
+                    return context.AddIntrinsic(inst, n, m);
+                });
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs b/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs
new file mode 100644
index 00000000..f0d242ae
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs
@@ -0,0 +1,720 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+    static class InstEmitSimdHelperArm64
+    {
+        public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+        }
+
+        public static void EmitScalarUnaryOpFFromGp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+        }
+
+        public static void EmitScalarUnaryOpFToGp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
+                ? context.AddIntrinsicInt (inst, n)
+                : context.AddIntrinsicLong(inst, n));
+        }
+
+        public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitScalarBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+        }
+
+        public static void EmitScalarTernaryOpF(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+            Operand a = GetVec(op.Ra);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, a, n, m));
+        }
+
+        public static void EmitScalarTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+        }
+
+        public static void EmitScalarUnaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+        }
+
+        public static void EmitScalarBinaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitScalarBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+        }
+
+        public static void EmitScalarTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+        }
+
+        public static void EmitScalarShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
+        }
+
+        public static void EmitScalarShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+        }
+
+        public static void EmitScalarSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitScalarSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            Operand result = context.AddIntrinsic(inst, n);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitScalarSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            Operand result = context.AddIntrinsic(inst, n, m);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitScalarSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            Operand result = context.AddIntrinsic(inst, d, n);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitScalarConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+        }
+
+        public static void EmitScalarConvertBinaryOpFFromGp(ArmEmitterContext context, Intrinsic inst, int fBits)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+        }
+
+        public static void EmitScalarConvertBinaryOpFToGp(ArmEmitterContext context, Intrinsic inst, int fBits)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
+                ? context.AddIntrinsicInt (inst, n, Const(fBits))
+                : context.AddIntrinsicLong(inst, n, Const(fBits)));
+        }
+
+        public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+        }
+
+        public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitVectorBinaryOpFRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+        }
+
+        public static void EmitVectorBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+        }
+
+        public static void EmitVectorTernaryOpFRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+        }
+
+        public static void EmitVectorTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+        }
+
+        public static void EmitVectorUnaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+        }
+
+        public static void EmitVectorBinaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+        }
+
+        public static void EmitVectorBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+        }
+
+        public static void EmitVectorBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+        }
+
+        public static void EmitVectorTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+        }
+
+        public static void EmitVectorTernaryOpRdByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+        }
+
+        public static void EmitVectorShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
+        }
+
+        public static void EmitVectorShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+        }
+
+        public static void EmitVectorSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitVectorSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            Operand result = context.AddIntrinsic(inst, n);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitVectorSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            Operand result = context.AddIntrinsic(inst, n, m);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitVectorSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            Operand result = context.AddIntrinsic(inst, d, n);
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitVectorSaturatingBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            Operand result = context.AddIntrinsic(inst, n, m, Const(op.Index));
+
+            context.Copy(GetVec(op.Rd), result);
+
+            context.SetPendingQcFlagSync();
+        }
+
+        public static void EmitVectorConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+        }
+
+        public static void EmitVectorLookupTable(ArmEmitterContext context, Intrinsic inst)
+        {
+            OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
+
+            Operand[] operands = new Operand[op.Size + 1];
+
+            operands[op.Size] = GetVec(op.Rm);
+
+            for (int index = 0; index < op.Size; index++)
+            {
+                operands[index] = GetVec((op.Rn + index) & 0x1F);
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd128)
+            {
+                inst |= Intrinsic.Arm64V128;
+            }
+
+            context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, operands));
+        }
+
+        public static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
+
+            Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
+
+            if ((op.Size & 1) != 0)
+            {
+                inst |= Intrinsic.Arm64VDouble;
+            }
+
+            Operand n = GetVec(op.Rn);
+            Operand m = cmpWithZero ? Const(0) : GetVec(op.Rm);
+
+            Operand nzcv = context.AddIntrinsicInt(inst, n, m);
+
+            Operand one = Const(1);
+
+            SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
+            SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
+            SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
+            SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
index 624ae841..8ca81580 100644
--- a/ARMeilleure/Instructions/InstEmitSimdLogical.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
@@ -14,7 +14,11 @@ namespace ARMeilleure.Instructions
     {
         public static void And_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
 
         public static void Bic_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -98,12 +106,26 @@ namespace ARMeilleure.Instructions
 
         public static void Bif_V(ArmEmitterContext context)
         {
-            EmitBifBit(context, notRm: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV);
+            }
+            else
+            {
+                EmitBifBit(context, notRm: true);
+            }
         }
 
         public static void Bit_V(ArmEmitterContext context)
         {
-            EmitBifBit(context, notRm: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV);
+            }
+            else
+            {
+                EmitBifBit(context, notRm: false);
+            }
         }
 
         private static void EmitBifBit(ArmEmitterContext context, bool notRm)
@@ -167,7 +189,11 @@ namespace ARMeilleure.Instructions
 
         public static void Bsl_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -200,7 +226,11 @@ namespace ARMeilleure.Instructions
 
         public static void Eor_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -249,7 +279,11 @@ namespace ARMeilleure.Instructions
 
         public static void Orn_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
@@ -280,7 +314,11 @@ namespace ARMeilleure.Instructions
 
         public static void Orr_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV);
+            }
+            else if (Optimizations.UseSse2)
             {
                 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
index dd686d4d..c2a04778 100644
--- a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
@@ -13,7 +13,11 @@ namespace ARMeilleure.Instructions
     {
         public static void Vand_I(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64AndV | Intrinsic.Arm64V128, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m));
             }
@@ -25,7 +29,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vbic_I(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64BicV | Intrinsic.Arm64V128, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n));
             }
@@ -73,17 +81,35 @@ namespace ARMeilleure.Instructions
 
         public static void Vbif(ArmEmitterContext context)
         {
-            EmitBifBit(context, true);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BifV | Intrinsic.Arm64V128, d, n, m));
+            }
+            else
+            {
+                EmitBifBit(context, true);
+            }
         }
 
         public static void Vbit(ArmEmitterContext context)
         {
-            EmitBifBit(context, false);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BitV | Intrinsic.Arm64V128, d, n, m));
+            }
+            else
+            {
+                EmitBifBit(context, false);
+            }
         }
 
         public static void Vbsl(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BslV | Intrinsic.Arm64V128, d, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorTernaryOpSimd32(context, (d, n, m) =>
                 {
@@ -105,7 +131,11 @@ namespace ARMeilleure.Instructions
 
         public static void Veor_I(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64EorV | Intrinsic.Arm64V128, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m));
             }
@@ -117,7 +147,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vorn_I(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 Operand mask = context.VectorOne();
 
@@ -135,7 +169,11 @@ namespace ARMeilleure.Instructions
 
         public static void Vorr_I(ArmEmitterContext context)
         {
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrrV | Intrinsic.Arm64V128, n, m));
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m));
             }
diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs
index 7da180fc..17100eb9 100644
--- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs
@@ -392,7 +392,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
 
-            if (Optimizations.UseSse2)
+            if (Optimizations.UseAdvSimd)
+            {
+                EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Zip1V, Intrinsic.Arm64Zip2V);
+            }
+            else if (Optimizations.UseSse2)
             {
                 EmitVectorShuffleOpSimd32(context, (m, d) =>
                 {
@@ -461,7 +465,11 @@ namespace ARMeilleure.Instructions
         {
             OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
 
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Uzp1V, Intrinsic.Arm64Uzp2V);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 EmitVectorShuffleOpSimd32(context, (m, d) =>
                 {
@@ -559,6 +567,52 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        private static void EmitVectorZipUzpOpSimd32(ArmEmitterContext context, Intrinsic inst1, Intrinsic inst2)
+        {
+            OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+            bool overlap = op.Qm == op.Qd;
+
+            Operand d = GetVecA32(op.Qd);
+            Operand m = GetVecA32(op.Qm);
+
+            Operand dPart = d;
+            Operand mPart = m;
+
+            if (!op.Q) // Register swap: move relevant doubleword to destination side.
+            {
+                dPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, d, op.Vd, 0);
+                mPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, m, op.Vm, 0);
+            }
+
+            Intrinsic vSize = op.Q ? Intrinsic.Arm64V128 : Intrinsic.Arm64V64;
+
+            vSize |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+            Operand resD = context.AddIntrinsic(inst1 | vSize, dPart, mPart);
+            Operand resM = context.AddIntrinsic(inst2 | vSize, dPart, mPart);
+
+            if (!op.Q) // Register insert.
+            {
+                resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, d, Const(op.Vd & 1), resD, Const(0));
+
+                if (overlap)
+                {
+                    resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, resD, Const(op.Vm & 1), resM, Const(0));
+                }
+                else
+                {
+                    resM = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, m, Const(op.Vm & 1), resM, Const(0));
+                }
+            }
+
+            context.Copy(d, resD);
+            if (!overlap)
+            {
+                context.Copy(m, resM);
+            }
+        }
+
         private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
         {
             OpCode32Simd op = (OpCode32Simd)context.CurrOp;
diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs
index cf3b51bd..19e41119 100644
--- a/ARMeilleure/Instructions/InstEmitSimdShift.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs
@@ -26,7 +26,15 @@ namespace ARMeilleure.Instructions
 
         public static void Rshrn_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64RshrnV, shift);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
@@ -80,7 +88,14 @@ namespace ARMeilleure.Instructions
 
             int shift = GetImmShl(op);
 
-            EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64ShlS, shift);
+            }
+            else
+            {
+                EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+            }
         }
 
         public static void Shl_V(ArmEmitterContext context)
@@ -90,7 +105,11 @@ namespace ARMeilleure.Instructions
             int shift = GetImmShl(op);
             int eSize = 8 << op.Size;
 
-            if (shift >= eSize)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64ShlV, shift);
+            }
+            else if (shift >= eSize)
             {
                 if ((op.RegisterSize == RegisterSize.Simd64))
                 {
@@ -143,7 +162,11 @@ namespace ARMeilleure.Instructions
 
             int shift = 8 << op.Size;
 
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ShllV);
+            }
+            else if (Optimizations.UseSse41)
             {
                 Operand n = GetVec(op.Rn);
 
@@ -170,7 +193,15 @@ namespace ARMeilleure.Instructions
 
         public static void Shrn_V(ArmEmitterContext context)
         {
-            if (Optimizations.UseSsse3)
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64ShrnV, shift);
+            }
+            else if (Optimizations.UseSsse3)
             {
                 OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
@@ -205,89 +236,259 @@ namespace ARMeilleure.Instructions
 
         public static void Sli_S(ArmEmitterContext context)
         {
-            EmitSli(context, scalar: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShl(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SliS, shift);
+            }
+            else
+            {
+                EmitSli(context, scalar: true);
+            }
         }
 
         public static void Sli_V(ArmEmitterContext context)
         {
-            EmitSli(context, scalar: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShl(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SliV, shift);
+            }
+            else
+            {
+                EmitSli(context, scalar: false);
+            }
         }
 
         public static void Sqrshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
+            }
         }
 
         public static void Sqrshrn_S(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnS, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+            }
         }
 
         public static void Sqrshrn_V(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnV, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+            }
         }
 
         public static void Sqrshrun_S(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunS, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+            }
         }
 
         public static void Sqrshrun_V(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunV, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+            }
         }
 
         public static void Sqshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
+            }
         }
 
         public static void Sqshrn_S(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnS, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+            }
         }
 
         public static void Sqshrn_V(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnV, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+            }
         }
 
         public static void Sqshrun_S(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunS, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+            }
         }
 
         public static void Sqshrun_V(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunV, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+            }
         }
 
         public static void Sri_S(ArmEmitterContext context)
         {
-            EmitSri(context, scalar: true);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SriS, shift);
+            }
+            else
+            {
+                EmitSri(context, scalar: true);
+            }
         }
 
         public static void Sri_V(ArmEmitterContext context)
         {
-            EmitSri(context, scalar: false);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SriV, shift);
+            }
+            else
+            {
+                EmitSri(context, scalar: false);
+            }
         }
 
         public static void Srshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
+            }
         }
 
         public static void Srshr_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SrshrS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
+            }
         }
 
         public static void Srshr_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SrshrV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
             {
                 int shift = GetImmShr(op);
                 int eSize = 8 << op.Size;
@@ -325,14 +526,31 @@ namespace ARMeilleure.Instructions
 
         public static void Srsra_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SrsraS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+            }
         }
 
         public static void Srsra_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SrsraV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
             {
                 int shift = GetImmShr(op);
                 int eSize = 8 << op.Size;
@@ -372,12 +590,26 @@ namespace ARMeilleure.Instructions
 
         public static void Sshl_S(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SshlS);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
+            }
         }
 
         public static void Sshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Signed);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Signed);
+            }
         }
 
         public static void Sshll_V(ArmEmitterContext context)
@@ -386,7 +618,11 @@ namespace ARMeilleure.Instructions
 
             int shift = GetImmShl(op);
 
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshllV, shift);
+            }
+            else if (Optimizations.UseSse41)
             {
                 Operand n = GetVec(op.Rn);
 
@@ -416,7 +652,18 @@ namespace ARMeilleure.Instructions
 
         public static void Sshr_S(ArmEmitterContext context)
         {
-            EmitShrImmOp(context, ShrImmFlags.ScalarSx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SshrS, shift);
+            }
+            else
+            {
+                EmitShrImmOp(context, ShrImmFlags.ScalarSx);
+            }
         }
 
         public static void Sshr_V(ArmEmitterContext context)
@@ -425,7 +672,11 @@ namespace ARMeilleure.Instructions
 
             int shift = GetImmShr(op);
 
-            if (Optimizations.UseGfni && op.Size == 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshrV, shift);
+            }
+            else if (Optimizations.UseGfni && op.Size == 0)
             {
                 Operand n = GetVec(op.Rn);
 
@@ -478,14 +729,31 @@ namespace ARMeilleure.Instructions
 
         public static void Ssra_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SsraS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
+            }
         }
 
         public static void Ssra_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SsraV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
             {
                 int shift = GetImmShr(op);
 
@@ -515,49 +783,131 @@ namespace ARMeilleure.Instructions
 
         public static void Uqrshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqrshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
+            }
         }
 
         public static void Uqrshrn_S(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnS, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+            }
         }
 
         public static void Uqrshrn_V(ArmEmitterContext context)
         {
-            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnV, shift);
+            }
+            else
+            {
+                EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+            }
         }
 
         public static void Uqshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Saturating);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Saturating);
+            }
         }
 
         public static void Uqshrn_S(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnS, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+            }
         }
 
         public static void Uqshrn_V(ArmEmitterContext context)
         {
-            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnV, shift);
+            }
+            else
+            {
+                EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+            }
         }
 
         public static void Urshl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Round);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Round);
+            }
         }
 
         public static void Urshr_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UrshrS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
+            }
         }
 
         public static void Urshr_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UrshrV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 int shift = GetImmShr(op);
                 int eSize = 8 << op.Size;
@@ -593,14 +943,31 @@ namespace ARMeilleure.Instructions
 
         public static void Ursra_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UrsraS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+            }
         }
 
         public static void Ursra_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UrsraV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 int shift = GetImmShr(op);
                 int eSize = 8 << op.Size;
@@ -638,12 +1005,26 @@ namespace ARMeilleure.Instructions
 
         public static void Ushl_S(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.Scalar);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64UshlS);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.Scalar);
+            }
         }
 
         public static void Ushl_V(ArmEmitterContext context)
         {
-            EmitShlRegOp(context, ShlRegFlags.None);
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UshlV);
+            }
+            else
+            {
+                EmitShlRegOp(context, ShlRegFlags.None);
+            }
         }
 
         public static void Ushll_V(ArmEmitterContext context)
@@ -652,7 +1033,11 @@ namespace ARMeilleure.Instructions
 
             int shift = GetImmShl(op);
 
-            if (Optimizations.UseSse41)
+            if (Optimizations.UseAdvSimd)
+            {
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshllV, shift);
+            }
+            else if (Optimizations.UseSse41)
             {
                 Operand n = GetVec(op.Rn);
 
@@ -682,14 +1067,31 @@ namespace ARMeilleure.Instructions
 
         public static void Ushr_S(ArmEmitterContext context)
         {
-            EmitShrImmOp(context, ShrImmFlags.ScalarZx);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UshrS, shift);
+            }
+            else
+            {
+                EmitShrImmOp(context, ShrImmFlags.ScalarZx);
+            }
         }
 
         public static void Ushr_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshrV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 int shift = GetImmShr(op);
 
@@ -714,14 +1116,31 @@ namespace ARMeilleure.Instructions
 
         public static void Usra_S(ArmEmitterContext context)
         {
-            EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
+            if (Optimizations.UseAdvSimd)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UsraS, shift);
+            }
+            else
+            {
+                EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
+            }
         }
 
         public static void Usra_V(ArmEmitterContext context)
         {
             OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
 
-            if (Optimizations.UseSse2 && op.Size > 0)
+            if (Optimizations.UseAdvSimd)
+            {
+                int shift = GetImmShr(op);
+
+                InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UsraV, shift);
+            }
+            else if (Optimizations.UseSse2 && op.Size > 0)
             {
                 int shift = GetImmShr(op);
 
diff --git a/ARMeilleure/Instructions/InstEmitSystem.cs b/ARMeilleure/Instructions/InstEmitSystem.cs
index cc32228c..1345bbf1 100644
--- a/ARMeilleure/Instructions/InstEmitSystem.cs
+++ b/ARMeilleure/Instructions/InstEmitSystem.cs
@@ -150,6 +150,8 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSystem op = (OpCodeSystem)context.CurrOp;
 
+            context.SyncQcFlag();
+
             Operand fpsr = Const(0);
 
             for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
@@ -196,6 +198,8 @@ namespace ARMeilleure.Instructions
         {
             OpCodeSystem op = (OpCodeSystem)context.CurrOp;
 
+            context.ClearQcFlagIfModified();
+
             Operand fpsr = GetIntOrZR(context, op.Rt);
                     fpsr = context.ConvertI64ToI32(fpsr);
 
-- 
cgit v1.2.3-70-g09d2