aboutsummaryrefslogtreecommitdiff
path: root/Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs
diff options
context:
space:
mode:
Diffstat (limited to 'Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs')
-rw-r--r--Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs149
1 files changed, 149 insertions, 0 deletions
diff --git a/Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs b/Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs
new file mode 100644
index 00000000..c4bbf745
--- /dev/null
+++ b/Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs
@@ -0,0 +1,149 @@
+using System;
+using System.Numerics;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace Ryujinx.Graphics.OpenGL.Image
+{
+ static class FormatConverter
+ {
+ public unsafe static byte[] ConvertS8D24ToD24S8(ReadOnlySpan<byte> data)
+ {
+ byte[] output = new byte[data.Length];
+
+ int start = 0;
+
+ if (Avx2.IsSupported)
+ {
+ var mask = Vector256.Create(
+ (byte)3, (byte)0, (byte)1, (byte)2,
+ (byte)7, (byte)4, (byte)5, (byte)6,
+ (byte)11, (byte)8, (byte)9, (byte)10,
+ (byte)15, (byte)12, (byte)13, (byte)14,
+ (byte)19, (byte)16, (byte)17, (byte)18,
+ (byte)23, (byte)20, (byte)21, (byte)22,
+ (byte)27, (byte)24, (byte)25, (byte)26,
+ (byte)31, (byte)28, (byte)29, (byte)30);
+
+ int sizeAligned = data.Length & ~31;
+
+ fixed (byte* pInput = data, pOutput = output)
+ {
+ for (uint i = 0; i < sizeAligned; i += 32)
+ {
+ var dataVec = Avx.LoadVector256(pInput + i);
+
+ dataVec = Avx2.Shuffle(dataVec, mask);
+
+ Avx.Store(pOutput + i, dataVec);
+ }
+ }
+
+ start = sizeAligned;
+ }
+ else if (Ssse3.IsSupported)
+ {
+ var mask = Vector128.Create(
+ (byte)3, (byte)0, (byte)1, (byte)2,
+ (byte)7, (byte)4, (byte)5, (byte)6,
+ (byte)11, (byte)8, (byte)9, (byte)10,
+ (byte)15, (byte)12, (byte)13, (byte)14);
+
+ int sizeAligned = data.Length & ~15;
+
+ fixed (byte* pInput = data, pOutput = output)
+ {
+ for (uint i = 0; i < sizeAligned; i += 16)
+ {
+ var dataVec = Sse2.LoadVector128(pInput + i);
+
+ dataVec = Ssse3.Shuffle(dataVec, mask);
+
+ Sse2.Store(pOutput + i, dataVec);
+ }
+ }
+
+ start = sizeAligned;
+ }
+
+ var outSpan = MemoryMarshal.Cast<byte, uint>(output);
+ var dataSpan = MemoryMarshal.Cast<byte, uint>(data);
+ for (int i = start / sizeof(uint); i < dataSpan.Length; i++)
+ {
+ outSpan[i] = BitOperations.RotateLeft(dataSpan[i], 8);
+ }
+
+ return output;
+ }
+
+ public unsafe static byte[] ConvertD24S8ToS8D24(ReadOnlySpan<byte> data)
+ {
+ byte[] output = new byte[data.Length];
+
+ int start = 0;
+
+ if (Avx2.IsSupported)
+ {
+ var mask = Vector256.Create(
+ (byte)1, (byte)2, (byte)3, (byte)0,
+ (byte)5, (byte)6, (byte)7, (byte)4,
+ (byte)9, (byte)10, (byte)11, (byte)8,
+ (byte)13, (byte)14, (byte)15, (byte)12,
+ (byte)17, (byte)18, (byte)19, (byte)16,
+ (byte)21, (byte)22, (byte)23, (byte)20,
+ (byte)25, (byte)26, (byte)27, (byte)24,
+ (byte)29, (byte)30, (byte)31, (byte)28);
+
+ int sizeAligned = data.Length & ~31;
+
+ fixed (byte* pInput = data, pOutput = output)
+ {
+ for (uint i = 0; i < sizeAligned; i += 32)
+ {
+ var dataVec = Avx.LoadVector256(pInput + i);
+
+ dataVec = Avx2.Shuffle(dataVec, mask);
+
+ Avx.Store(pOutput + i, dataVec);
+ }
+ }
+
+ start = sizeAligned;
+ }
+ else if (Ssse3.IsSupported)
+ {
+ var mask = Vector128.Create(
+ (byte)1, (byte)2, (byte)3, (byte)0,
+ (byte)5, (byte)6, (byte)7, (byte)4,
+ (byte)9, (byte)10, (byte)11, (byte)8,
+ (byte)13, (byte)14, (byte)15, (byte)12);
+
+ int sizeAligned = data.Length & ~15;
+
+ fixed (byte* pInput = data, pOutput = output)
+ {
+ for (uint i = 0; i < sizeAligned; i += 16)
+ {
+ var dataVec = Sse2.LoadVector128(pInput + i);
+
+ dataVec = Ssse3.Shuffle(dataVec, mask);
+
+ Sse2.Store(pOutput + i, dataVec);
+ }
+ }
+
+ start = sizeAligned;
+ }
+
+ var outSpan = MemoryMarshal.Cast<byte, uint>(output);
+ var dataSpan = MemoryMarshal.Cast<byte, uint>(data);
+ for (int i = start / sizeof(uint); i < dataSpan.Length; i++)
+ {
+ outSpan[i] = BitOperations.RotateRight(dataSpan[i], 8);
+ }
+
+ return output;
+ }
+ }
+}