diff options
Diffstat (limited to 'Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs')
-rw-r--r-- | Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs b/Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs new file mode 100644 index 00000000..c4bbf745 --- /dev/null +++ b/Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs @@ -0,0 +1,149 @@ +using System; +using System.Numerics; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace Ryujinx.Graphics.OpenGL.Image +{ + static class FormatConverter + { + public unsafe static byte[] ConvertS8D24ToD24S8(ReadOnlySpan<byte> data) + { + byte[] output = new byte[data.Length]; + + int start = 0; + + if (Avx2.IsSupported) + { + var mask = Vector256.Create( + (byte)3, (byte)0, (byte)1, (byte)2, + (byte)7, (byte)4, (byte)5, (byte)6, + (byte)11, (byte)8, (byte)9, (byte)10, + (byte)15, (byte)12, (byte)13, (byte)14, + (byte)19, (byte)16, (byte)17, (byte)18, + (byte)23, (byte)20, (byte)21, (byte)22, + (byte)27, (byte)24, (byte)25, (byte)26, + (byte)31, (byte)28, (byte)29, (byte)30); + + int sizeAligned = data.Length & ~31; + + fixed (byte* pInput = data, pOutput = output) + { + for (uint i = 0; i < sizeAligned; i += 32) + { + var dataVec = Avx.LoadVector256(pInput + i); + + dataVec = Avx2.Shuffle(dataVec, mask); + + Avx.Store(pOutput + i, dataVec); + } + } + + start = sizeAligned; + } + else if (Ssse3.IsSupported) + { + var mask = Vector128.Create( + (byte)3, (byte)0, (byte)1, (byte)2, + (byte)7, (byte)4, (byte)5, (byte)6, + (byte)11, (byte)8, (byte)9, (byte)10, + (byte)15, (byte)12, (byte)13, (byte)14); + + int sizeAligned = data.Length & ~15; + + fixed (byte* pInput = data, pOutput = output) + { + for (uint i = 0; i < sizeAligned; i += 16) + { + var dataVec = Sse2.LoadVector128(pInput + i); + + dataVec = Ssse3.Shuffle(dataVec, mask); + + Sse2.Store(pOutput + i, dataVec); + } + } + + start = sizeAligned; + } + + var outSpan = MemoryMarshal.Cast<byte, uint>(output); + var dataSpan = MemoryMarshal.Cast<byte, uint>(data); + for (int i = start / sizeof(uint); i < dataSpan.Length; i++) + { + outSpan[i] = BitOperations.RotateLeft(dataSpan[i], 8); + } + + return output; + } + + public unsafe static byte[] ConvertD24S8ToS8D24(ReadOnlySpan<byte> data) + { + byte[] output = new byte[data.Length]; + + int start = 0; + + if (Avx2.IsSupported) + { + var mask = Vector256.Create( + (byte)1, (byte)2, (byte)3, (byte)0, + (byte)5, (byte)6, (byte)7, (byte)4, + (byte)9, (byte)10, (byte)11, (byte)8, + (byte)13, (byte)14, (byte)15, (byte)12, + (byte)17, (byte)18, (byte)19, (byte)16, + (byte)21, (byte)22, (byte)23, (byte)20, + (byte)25, (byte)26, (byte)27, (byte)24, + (byte)29, (byte)30, (byte)31, (byte)28); + + int sizeAligned = data.Length & ~31; + + fixed (byte* pInput = data, pOutput = output) + { + for (uint i = 0; i < sizeAligned; i += 32) + { + var dataVec = Avx.LoadVector256(pInput + i); + + dataVec = Avx2.Shuffle(dataVec, mask); + + Avx.Store(pOutput + i, dataVec); + } + } + + start = sizeAligned; + } + else if (Ssse3.IsSupported) + { + var mask = Vector128.Create( + (byte)1, (byte)2, (byte)3, (byte)0, + (byte)5, (byte)6, (byte)7, (byte)4, + (byte)9, (byte)10, (byte)11, (byte)8, + (byte)13, (byte)14, (byte)15, (byte)12); + + int sizeAligned = data.Length & ~15; + + fixed (byte* pInput = data, pOutput = output) + { + for (uint i = 0; i < sizeAligned; i += 16) + { + var dataVec = Sse2.LoadVector128(pInput + i); + + dataVec = Ssse3.Shuffle(dataVec, mask); + + Sse2.Store(pOutput + i, dataVec); + } + } + + start = sizeAligned; + } + + var outSpan = MemoryMarshal.Cast<byte, uint>(output); + var dataSpan = MemoryMarshal.Cast<byte, uint>(data); + for (int i = start / sizeof(uint); i < dataSpan.Length; i++) + { + outSpan[i] = BitOperations.RotateRight(dataSpan[i], 8); + } + + return output; + } + } +} |