diff options
Diffstat (limited to 'Ryujinx.Graphics.OpenGL/Image')
-rw-r--r-- | Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs | 149 | ||||
-rw-r--r-- | Ryujinx.Graphics.OpenGL/Image/TextureCopy.cs | 6 | ||||
-rw-r--r-- | Ryujinx.Graphics.OpenGL/Image/TextureView.cs | 23 |
3 files changed, 172 insertions, 6 deletions
diff --git a/Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs b/Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs new file mode 100644 index 00000000..c4bbf745 --- /dev/null +++ b/Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs @@ -0,0 +1,149 @@ +using System; +using System.Numerics; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace Ryujinx.Graphics.OpenGL.Image +{ + static class FormatConverter + { + public unsafe static byte[] ConvertS8D24ToD24S8(ReadOnlySpan<byte> data) + { + byte[] output = new byte[data.Length]; + + int start = 0; + + if (Avx2.IsSupported) + { + var mask = Vector256.Create( + (byte)3, (byte)0, (byte)1, (byte)2, + (byte)7, (byte)4, (byte)5, (byte)6, + (byte)11, (byte)8, (byte)9, (byte)10, + (byte)15, (byte)12, (byte)13, (byte)14, + (byte)19, (byte)16, (byte)17, (byte)18, + (byte)23, (byte)20, (byte)21, (byte)22, + (byte)27, (byte)24, (byte)25, (byte)26, + (byte)31, (byte)28, (byte)29, (byte)30); + + int sizeAligned = data.Length & ~31; + + fixed (byte* pInput = data, pOutput = output) + { + for (uint i = 0; i < sizeAligned; i += 32) + { + var dataVec = Avx.LoadVector256(pInput + i); + + dataVec = Avx2.Shuffle(dataVec, mask); + + Avx.Store(pOutput + i, dataVec); + } + } + + start = sizeAligned; + } + else if (Ssse3.IsSupported) + { + var mask = Vector128.Create( + (byte)3, (byte)0, (byte)1, (byte)2, + (byte)7, (byte)4, (byte)5, (byte)6, + (byte)11, (byte)8, (byte)9, (byte)10, + (byte)15, (byte)12, (byte)13, (byte)14); + + int sizeAligned = data.Length & ~15; + + fixed (byte* pInput = data, pOutput = output) + { + for (uint i = 0; i < sizeAligned; i += 16) + { + var dataVec = Sse2.LoadVector128(pInput + i); + + dataVec = Ssse3.Shuffle(dataVec, mask); + + Sse2.Store(pOutput + i, dataVec); + } + } + + start = sizeAligned; + } + + var outSpan = MemoryMarshal.Cast<byte, uint>(output); + var dataSpan = MemoryMarshal.Cast<byte, uint>(data); + for (int i = start / sizeof(uint); i < dataSpan.Length; i++) + { + outSpan[i] = BitOperations.RotateLeft(dataSpan[i], 8); + } + + return output; + } + + public unsafe static byte[] ConvertD24S8ToS8D24(ReadOnlySpan<byte> data) + { + byte[] output = new byte[data.Length]; + + int start = 0; + + if (Avx2.IsSupported) + { + var mask = Vector256.Create( + (byte)1, (byte)2, (byte)3, (byte)0, + (byte)5, (byte)6, (byte)7, (byte)4, + (byte)9, (byte)10, (byte)11, (byte)8, + (byte)13, (byte)14, (byte)15, (byte)12, + (byte)17, (byte)18, (byte)19, (byte)16, + (byte)21, (byte)22, (byte)23, (byte)20, + (byte)25, (byte)26, (byte)27, (byte)24, + (byte)29, (byte)30, (byte)31, (byte)28); + + int sizeAligned = data.Length & ~31; + + fixed (byte* pInput = data, pOutput = output) + { + for (uint i = 0; i < sizeAligned; i += 32) + { + var dataVec = Avx.LoadVector256(pInput + i); + + dataVec = Avx2.Shuffle(dataVec, mask); + + Avx.Store(pOutput + i, dataVec); + } + } + + start = sizeAligned; + } + else if (Ssse3.IsSupported) + { + var mask = Vector128.Create( + (byte)1, (byte)2, (byte)3, (byte)0, + (byte)5, (byte)6, (byte)7, (byte)4, + (byte)9, (byte)10, (byte)11, (byte)8, + (byte)13, (byte)14, (byte)15, (byte)12); + + int sizeAligned = data.Length & ~15; + + fixed (byte* pInput = data, pOutput = output) + { + for (uint i = 0; i < sizeAligned; i += 16) + { + var dataVec = Sse2.LoadVector128(pInput + i); + + dataVec = Ssse3.Shuffle(dataVec, mask); + + Sse2.Store(pOutput + i, dataVec); + } + } + + start = sizeAligned; + } + + var outSpan = MemoryMarshal.Cast<byte, uint>(output); + var dataSpan = MemoryMarshal.Cast<byte, uint>(data); + for (int i = start / sizeof(uint); i < dataSpan.Length; i++) + { + outSpan[i] = BitOperations.RotateRight(dataSpan[i], 8); + } + + return output; + } + } +} diff --git a/Ryujinx.Graphics.OpenGL/Image/TextureCopy.cs b/Ryujinx.Graphics.OpenGL/Image/TextureCopy.cs index 7811d021..9be86561 100644 --- a/Ryujinx.Graphics.OpenGL/Image/TextureCopy.cs +++ b/Ryujinx.Graphics.OpenGL/Image/TextureCopy.cs @@ -291,7 +291,7 @@ namespace Ryujinx.Graphics.OpenGL.Image private static ClearBufferMask GetMask(Format format) { - if (format == Format.D24UnormS8Uint || format == Format.D32FloatS8Uint) + if (format == Format.D24UnormS8Uint || format == Format.D32FloatS8Uint || format == Format.S8UintD24Unorm) { return ClearBufferMask.DepthBufferBit | ClearBufferMask.StencilBufferBit; } @@ -311,9 +311,7 @@ namespace Ryujinx.Graphics.OpenGL.Image private static bool IsDepthOnly(Format format) { - return format == Format.D16Unorm || - format == Format.D24X8Unorm || - format == Format.D32Float; + return format == Format.D16Unorm || format == Format.D32Float; } public TextureView BgraSwap(TextureView from) diff --git a/Ryujinx.Graphics.OpenGL/Image/TextureView.cs b/Ryujinx.Graphics.OpenGL/Image/TextureView.cs index f03653c4..909a0620 100644 --- a/Ryujinx.Graphics.OpenGL/Image/TextureView.cs +++ b/Ryujinx.Graphics.OpenGL/Image/TextureView.cs @@ -140,9 +140,11 @@ namespace Ryujinx.Graphics.OpenGL.Image size += Info.GetMipSize(level); } + ReadOnlySpan<byte> data; + if (HwCapabilities.UsePersistentBufferForFlush) { - return _renderer.PersistentBuffers.Default.GetTextureData(this, size); + data = _renderer.PersistentBuffers.Default.GetTextureData(this, size); } else { @@ -150,8 +152,15 @@ namespace Ryujinx.Graphics.OpenGL.Image WriteTo(target); - return new ReadOnlySpan<byte>(target.ToPointer(), size); + data = new ReadOnlySpan<byte>(target.ToPointer(), size); + } + + if (Format == Format.S8UintD24Unorm) + { + data = FormatConverter.ConvertD24S8ToS8D24(data); } + + return data; } public unsafe ReadOnlySpan<byte> GetData(int layer, int level) @@ -285,6 +294,11 @@ namespace Ryujinx.Graphics.OpenGL.Image public void SetData(ReadOnlySpan<byte> data) { + if (Format == Format.S8UintD24Unorm) + { + data = FormatConverter.ConvertS8D24ToD24S8(data); + } + unsafe { fixed (byte* ptr = data) @@ -296,6 +310,11 @@ namespace Ryujinx.Graphics.OpenGL.Image public void SetData(ReadOnlySpan<byte> data, int layer, int level) { + if (Format == Format.S8UintD24Unorm) + { + data = FormatConverter.ConvertS8D24ToD24S8(data); + } + unsafe { fixed (byte* ptr = data) |