aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'ARMeilleure/CodeGen')
-rw-r--r--ARMeilleure/CodeGen/X86/Assembler.cs105
-rw-r--r--ARMeilleure/CodeGen/X86/AssemblerTable.cs2
-rw-r--r--ARMeilleure/CodeGen/X86/HardwareCapabilities.cs52
-rw-r--r--ARMeilleure/CodeGen/X86/IntrinsicTable.cs1
-rw-r--r--ARMeilleure/CodeGen/X86/X86Instruction.cs1
5 files changed, 158 insertions, 3 deletions
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs
index 2ea4208b..67736a31 100644
--- a/ARMeilleure/CodeGen/X86/Assembler.cs
+++ b/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -1034,7 +1034,13 @@ namespace ARMeilleure.CodeGen.X86
Debug.Assert(opCode != BadOp, "Invalid opcode value.");
- if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
+ if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding)
+ {
+ WriteEvexInst(dest, src1, src2, type, flags, opCode);
+
+ opCode &= 0xff;
+ }
+ else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
{
// In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits.
@@ -1153,6 +1159,103 @@ namespace ARMeilleure.CodeGen.X86
}
}
+ private void WriteEvexInst(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type,
+ InstructionFlags flags,
+ int opCode,
+ bool broadcast = false,
+ int registerWidth = 128,
+ int maskRegisterIdx = 0,
+ bool zeroElements = false)
+ {
+ int op1Idx = dest.GetRegister().Index;
+ int op2Idx = src1.GetRegister().Index;
+ int op3Idx = src2.GetRegister().Index;
+
+ WriteByte(0x62);
+
+ // P0
+ // Extend operand 1 register
+ bool r = (op1Idx & 8) == 0;
+ // Extend operand 3 register
+ bool x = (op3Idx & 16) == 0;
+ // Extend operand 3 register
+ bool b = (op3Idx & 8) == 0;
+ // Extend operand 1 register
+ bool rp = (op1Idx & 16) == 0;
+ // Escape code index
+ byte mm = 0b00;
+
+ switch ((ushort)(opCode >> 8))
+ {
+ case 0xf00: mm = 0b01; break;
+ case 0xf38: mm = 0b10; break;
+ case 0xf3a: mm = 0b11; break;
+
+ default: Debug.Fail($"Failed to EVEX encode opcode 0x{opCode:X}."); break;
+ }
+
+ WriteByte(
+ (byte)(
+ (r ? 0x80 : 0) |
+ (x ? 0x40 : 0) |
+ (b ? 0x20 : 0) |
+ (rp ? 0x10 : 0) |
+ mm));
+
+ // P1
+ // Specify 64-bit lane mode
+ bool w = Is64Bits(type);
+ // Operand 2 register index
+ byte vvvv = (byte)(~op2Idx & 0b1111);
+ // Opcode prefix
+ byte pp = (flags & InstructionFlags.PrefixMask) switch
+ {
+ InstructionFlags.Prefix66 => 0b01,
+ InstructionFlags.PrefixF3 => 0b10,
+ InstructionFlags.PrefixF2 => 0b11,
+ _ => 0
+ };
+ WriteByte(
+ (byte)(
+ (w ? 0x80 : 0) |
+ (vvvv << 3) |
+ 0b100 |
+ pp));
+
+ // P2
+ // Mask register determines what elements to zero, rather than what elements to merge
+ bool z = zeroElements;
+ // Specifies register-width
+ byte ll = 0b00;
+ switch (registerWidth)
+ {
+ case 128: ll = 0b00; break;
+ case 256: ll = 0b01; break;
+ case 512: ll = 0b10; break;
+
+ default: Debug.Fail($"Invalid EVEX vector register width {registerWidth}."); break;
+ }
+ // Embedded broadcast in the case of a memory operand
+ bool bcast = broadcast;
+ // Extend operand 2 register
+ bool vp = (op2Idx & 16) == 0;
+ // Mask register index
+ Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}.");
+ byte aaa = (byte)(maskRegisterIdx & 0b111);
+
+ WriteByte(
+ (byte)(
+ (z ? 0x80 : 0) |
+ (ll << 5) |
+ (bcast ? 0x10 : 0) |
+ (vp ? 8 : 0) |
+ aaa));
+ }
+
private void WriteCompactInst(Operand operand, int opCode)
{
int regIndex = operand.GetRegister().Index;
diff --git a/ARMeilleure/CodeGen/X86/AssemblerTable.cs b/ARMeilleure/CodeGen/X86/AssemblerTable.cs
index ecdc029f..b47b3ecd 100644
--- a/ARMeilleure/CodeGen/X86/AssemblerTable.cs
+++ b/ARMeilleure/CodeGen/X86/AssemblerTable.cs
@@ -20,6 +20,7 @@ namespace ARMeilleure.CodeGen.X86
Reg8Dest = 1 << 2,
RexW = 1 << 3,
Vex = 1 << 4,
+ Evex = 1 << 5,
PrefixBit = 16,
PrefixMask = 7 << PrefixBit,
@@ -278,6 +279,7 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vpternlogd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a25, InstructionFlags.Evex | InstructionFlags.Prefix66));
Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex));
diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
index c12a4e28..63a9e46a 100644
--- a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
+++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
@@ -1,10 +1,14 @@
+using Ryujinx.Memory;
using System;
+using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.X86;
namespace ARMeilleure.CodeGen.X86
{
static class HardwareCapabilities
{
+ private delegate uint GetXcr0();
+
static HardwareCapabilities()
{
if (!X86Base.IsSupported)
@@ -24,6 +28,28 @@ namespace ARMeilleure.CodeGen.X86
FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7;
}
+
+ Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax();
+ }
+
+ private static uint GetXcr0Eax()
+ {
+ ReadOnlySpan<byte> asmGetXcr0 = new byte[]
+ {
+ 0x31, 0xc9, // xor ecx, ecx
+ 0xf, 0x01, 0xd0, // xgetbv
+ 0xc3, // ret
+ };
+
+ using MemoryBlock memGetXcr0 = new MemoryBlock((ulong)asmGetXcr0.Length);
+
+ memGetXcr0.Write(0, asmGetXcr0);
+
+ memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute);
+
+ var fGetXcr0 = Marshal.GetDelegateForFunctionPointer<GetXcr0>(memGetXcr0.Pointer);
+
+ return fGetXcr0();
}
[Flags]
@@ -44,6 +70,7 @@ namespace ARMeilleure.CodeGen.X86
Sse42 = 1 << 20,
Popcnt = 1 << 23,
Aes = 1 << 25,
+ Osxsave = 1 << 27,
Avx = 1 << 28,
F16c = 1 << 29
}
@@ -52,7 +79,11 @@ namespace ARMeilleure.CodeGen.X86
public enum FeatureFlags7Ebx
{
Avx2 = 1 << 5,
- Sha = 1 << 29
+ Avx512f = 1 << 16,
+ Avx512dq = 1 << 17,
+ Sha = 1 << 29,
+ Avx512bw = 1 << 30,
+ Avx512vl = 1 << 31
}
[Flags]
@@ -61,10 +92,21 @@ namespace ARMeilleure.CodeGen.X86
Gfni = 1 << 8,
}
+ [Flags]
+ public enum Xcr0FlagsEax
+ {
+ Sse = 1 << 1,
+ YmmHi128 = 1 << 2,
+ Opmask = 1 << 5,
+ ZmmHi256 = 1 << 6,
+ Hi16Zmm = 1 << 7
+ }
+
public static FeatureFlags1Edx FeatureInfo1Edx { get; }
public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0;
+ public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0;
public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
@@ -76,8 +118,13 @@ namespace ARMeilleure.CodeGen.X86
public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42);
public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt);
public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes);
- public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx);
+ public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128);
public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
+ public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Osxsave)
+ && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm);
+ public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F;
+ public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F;
+ public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F;
public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni);
@@ -85,5 +132,6 @@ namespace ARMeilleure.CodeGen.X86
public static bool ForceLegacySse { get; set; }
public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse;
+ public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse;
}
} \ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
index 8c909ac1..c788fa44 100644
--- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -180,6 +180,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vpternlogd, new IntrinsicInfo(X86Instruction.Vpternlogd, IntrinsicType.TernaryImm));
Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary));
}
diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs
index b024394e..ecfc432d 100644
--- a/ARMeilleure/CodeGen/X86/X86Instruction.cs
+++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -219,6 +219,7 @@ namespace ARMeilleure.CodeGen.X86
Vfnmsub231sd,
Vfnmsub231ss,
Vpblendvb,
+ Vpternlogd,
Xor,
Xorpd,
Xorps,