diff options
author | TSR Berry <20988865+TSRBerry@users.noreply.github.com> | 2023-04-08 01:22:00 +0200 |
---|---|---|
committer | Mary <thog@protonmail.com> | 2023-04-27 23:51:14 +0200 |
commit | cee712105850ac3385cd0091a923438167433f9f (patch) | |
tree | 4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs | |
parent | cd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff) |
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs')
-rw-r--r-- | src/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs | 495 |
1 files changed, 495 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs b/src/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs new file mode 100644 index 00000000..10fd9d8d --- /dev/null +++ b/src/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs @@ -0,0 +1,495 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Texture; +using Ryujinx.Graphics.Vic.Types; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; +using static Ryujinx.Graphics.Vic.Image.SurfaceCommon; + +namespace Ryujinx.Graphics.Vic.Image +{ + static class SurfaceReader + { + public static Surface Read( + ResourceManager rm, + ref SlotConfig config, + ref SlotSurfaceConfig surfaceConfig, + ref Array8<PlaneOffsets> offsets) + { + switch (surfaceConfig.SlotPixelFormat) + { + case PixelFormat.Y8___V8U8_N420: return ReadNv12(rm, ref config, ref surfaceConfig, ref offsets); + } + + Logger.Error?.Print(LogClass.Vic, $"Unsupported pixel format \"{surfaceConfig.SlotPixelFormat}\"."); + + int lw = surfaceConfig.SlotLumaWidth + 1; + int lh = surfaceConfig.SlotLumaHeight + 1; + + return new Surface(rm.SurfacePool, lw, lh); + } + + private unsafe static Surface ReadNv12( + ResourceManager rm, + ref SlotConfig config, + ref SlotSurfaceConfig surfaceConfig, + ref Array8<PlaneOffsets> offsets) + { + InputSurface input = ReadSurface(rm, ref config, ref surfaceConfig, ref offsets, 1, 2); + + int width = input.Width; + int height = input.Height; + + int yStride = GetPitch(width, 1); + int uvStride = GetPitch(input.UvWidth, 2); + + Surface output = new Surface(rm.SurfacePool, width, height); + + if (Sse41.IsSupported) + { + Vector128<byte> shufMask = Vector128.Create( + (byte)0, (byte)2, (byte)3, (byte)1, + (byte)4, (byte)6, (byte)7, (byte)5, + (byte)8, (byte)10, (byte)11, (byte)9, + (byte)12, (byte)14, (byte)15, (byte)13); + Vector128<short> alphaMask = Vector128.Create(0xff << 24).AsInt16(); + + int yStrideGap = yStride - width; + int uvStrideGap = uvStride - input.UvWidth; + + int widthTrunc = width & ~0xf; + + fixed (Pixel* dstPtr = output.Data) + { + Pixel* op = dstPtr; + + fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1) + { + byte* i0p = src0Ptr; + + for (int y = 0; y < height; y++) + { + byte* i1p = src1Ptr + (y >> 1) * uvStride; + + int x = 0; + + for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16) + { + Vector128<short> ya0 = Sse41.ConvertToVector128Int16(i0p); + Vector128<short> ya1 = Sse41.ConvertToVector128Int16(i0p + 8); + + Vector128<byte> uv = Sse2.LoadVector128(i1p); + + Vector128<short> uv0 = Sse2.UnpackLow(uv.AsInt16(), uv.AsInt16()); + Vector128<short> uv1 = Sse2.UnpackHigh(uv.AsInt16(), uv.AsInt16()); + + Vector128<short> rgba0 = Sse2.UnpackLow(ya0, uv0); + Vector128<short> rgba1 = Sse2.UnpackHigh(ya0, uv0); + Vector128<short> rgba2 = Sse2.UnpackLow(ya1, uv1); + Vector128<short> rgba3 = Sse2.UnpackHigh(ya1, uv1); + + rgba0 = Ssse3.Shuffle(rgba0.AsByte(), shufMask).AsInt16(); + rgba1 = Ssse3.Shuffle(rgba1.AsByte(), shufMask).AsInt16(); + rgba2 = Ssse3.Shuffle(rgba2.AsByte(), shufMask).AsInt16(); + rgba3 = Ssse3.Shuffle(rgba3.AsByte(), shufMask).AsInt16(); + + rgba0 = Sse2.Or(rgba0, alphaMask); + rgba1 = Sse2.Or(rgba1, alphaMask); + rgba2 = Sse2.Or(rgba2, alphaMask); + rgba3 = Sse2.Or(rgba3, alphaMask); + + Vector128<short> rgba16_0 = Sse41.ConvertToVector128Int16(rgba0.AsByte()); + Vector128<short> rgba16_1 = Sse41.ConvertToVector128Int16(HighToLow(rgba0.AsByte())); + Vector128<short> rgba16_2 = Sse41.ConvertToVector128Int16(rgba1.AsByte()); + Vector128<short> rgba16_3 = Sse41.ConvertToVector128Int16(HighToLow(rgba1.AsByte())); + Vector128<short> rgba16_4 = Sse41.ConvertToVector128Int16(rgba2.AsByte()); + Vector128<short> rgba16_5 = Sse41.ConvertToVector128Int16(HighToLow(rgba2.AsByte())); + Vector128<short> rgba16_6 = Sse41.ConvertToVector128Int16(rgba3.AsByte()); + Vector128<short> rgba16_7 = Sse41.ConvertToVector128Int16(HighToLow(rgba3.AsByte())); + + rgba16_0 = Sse2.ShiftLeftLogical(rgba16_0, 2); + rgba16_1 = Sse2.ShiftLeftLogical(rgba16_1, 2); + rgba16_2 = Sse2.ShiftLeftLogical(rgba16_2, 2); + rgba16_3 = Sse2.ShiftLeftLogical(rgba16_3, 2); + rgba16_4 = Sse2.ShiftLeftLogical(rgba16_4, 2); + rgba16_5 = Sse2.ShiftLeftLogical(rgba16_5, 2); + rgba16_6 = Sse2.ShiftLeftLogical(rgba16_6, 2); + rgba16_7 = Sse2.ShiftLeftLogical(rgba16_7, 2); + + Sse2.Store((short*)(op + (uint)x + 0), rgba16_0); + Sse2.Store((short*)(op + (uint)x + 2), rgba16_1); + Sse2.Store((short*)(op + (uint)x + 4), rgba16_2); + Sse2.Store((short*)(op + (uint)x + 6), rgba16_3); + Sse2.Store((short*)(op + (uint)x + 8), rgba16_4); + Sse2.Store((short*)(op + (uint)x + 10), rgba16_5); + Sse2.Store((short*)(op + (uint)x + 12), rgba16_6); + Sse2.Store((short*)(op + (uint)x + 14), rgba16_7); + } + + for (; x < width; x++, i1p += (x & 1) * 2) + { + Pixel* px = op + (uint)x; + + px->R = Upsample(*i0p++); + px->G = Upsample(*i1p); + px->B = Upsample(*(i1p + 1)); + px->A = 0x3ff; + } + + op += width; + i0p += yStrideGap; + i1p += uvStrideGap; + } + } + } + } + else if (AdvSimd.Arm64.IsSupported) + { + Vector128<int> alphaMask = Vector128.Create(0xffu << 24).AsInt32(); + + int yStrideGap = yStride - width; + int uvStrideGap = uvStride - input.UvWidth; + + int widthTrunc = width & ~0xf; + + fixed (Pixel* dstPtr = output.Data) + { + Pixel* op = dstPtr; + + fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1) + { + byte* i0p = src0Ptr; + + for (int y = 0; y < height; y++) + { + byte* i1p = src1Ptr + (y >> 1) * uvStride; + + int x = 0; + + for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16) + { + Vector128<byte> ya = AdvSimd.LoadVector128(i0p); + Vector128<byte> uv = AdvSimd.LoadVector128(i1p); + + Vector128<short> ya0 = AdvSimd.ZeroExtendWideningLower(ya.GetLower()).AsInt16(); + Vector128<short> ya1 = AdvSimd.ZeroExtendWideningUpper(ya).AsInt16(); + + Vector128<short> uv0 = AdvSimd.Arm64.ZipLow(uv.AsInt16(), uv.AsInt16()); + Vector128<short> uv1 = AdvSimd.Arm64.ZipHigh(uv.AsInt16(), uv.AsInt16()); + + ya0 = AdvSimd.ShiftLeftLogical(ya0, 8); + ya1 = AdvSimd.ShiftLeftLogical(ya1, 8); + + Vector128<short> rgba0 = AdvSimd.Arm64.ZipLow(ya0, uv0); + Vector128<short> rgba1 = AdvSimd.Arm64.ZipHigh(ya0, uv0); + Vector128<short> rgba2 = AdvSimd.Arm64.ZipLow(ya1, uv1); + Vector128<short> rgba3 = AdvSimd.Arm64.ZipHigh(ya1, uv1); + + rgba0 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba0.AsInt32(), 8).AsInt16(); + rgba1 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba1.AsInt32(), 8).AsInt16(); + rgba2 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba2.AsInt32(), 8).AsInt16(); + rgba3 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba3.AsInt32(), 8).AsInt16(); + + Vector128<short> rgba16_0 = AdvSimd.ZeroExtendWideningLower(rgba0.AsByte().GetLower()).AsInt16(); + Vector128<short> rgba16_1 = AdvSimd.ZeroExtendWideningUpper(rgba0.AsByte()).AsInt16(); + Vector128<short> rgba16_2 = AdvSimd.ZeroExtendWideningLower(rgba1.AsByte().GetLower()).AsInt16(); + Vector128<short> rgba16_3 = AdvSimd.ZeroExtendWideningUpper(rgba1.AsByte()).AsInt16(); + Vector128<short> rgba16_4 = AdvSimd.ZeroExtendWideningLower(rgba2.AsByte().GetLower()).AsInt16(); + Vector128<short> rgba16_5 = AdvSimd.ZeroExtendWideningUpper(rgba2.AsByte()).AsInt16(); + Vector128<short> rgba16_6 = AdvSimd.ZeroExtendWideningLower(rgba3.AsByte().GetLower()).AsInt16(); + Vector128<short> rgba16_7 = AdvSimd.ZeroExtendWideningUpper(rgba3.AsByte()).AsInt16(); + + rgba16_0 = AdvSimd.ShiftLeftLogical(rgba16_0, 2); + rgba16_1 = AdvSimd.ShiftLeftLogical(rgba16_1, 2); + rgba16_2 = AdvSimd.ShiftLeftLogical(rgba16_2, 2); + rgba16_3 = AdvSimd.ShiftLeftLogical(rgba16_3, 2); + rgba16_4 = AdvSimd.ShiftLeftLogical(rgba16_4, 2); + rgba16_5 = AdvSimd.ShiftLeftLogical(rgba16_5, 2); + rgba16_6 = AdvSimd.ShiftLeftLogical(rgba16_6, 2); + rgba16_7 = AdvSimd.ShiftLeftLogical(rgba16_7, 2); + + AdvSimd.Store((short*)(op + (uint)x + 0), rgba16_0); + AdvSimd.Store((short*)(op + (uint)x + 2), rgba16_1); + AdvSimd.Store((short*)(op + (uint)x + 4), rgba16_2); + AdvSimd.Store((short*)(op + (uint)x + 6), rgba16_3); + AdvSimd.Store((short*)(op + (uint)x + 8), rgba16_4); + AdvSimd.Store((short*)(op + (uint)x + 10), rgba16_5); + AdvSimd.Store((short*)(op + (uint)x + 12), rgba16_6); + AdvSimd.Store((short*)(op + (uint)x + 14), rgba16_7); + } + + for (; x < width; x++, i1p += (x & 1) * 2) + { + Pixel* px = op + (uint)x; + + px->R = Upsample(*i0p++); + px->G = Upsample(*i1p); + px->B = Upsample(*(i1p + 1)); + px->A = 0x3ff; + } + + op += width; + i0p += yStrideGap; + i1p += uvStrideGap; + } + } + } + } + else + { + for (int y = 0; y < height; y++) + { + int uvBase = (y >> 1) * uvStride; + + for (int x = 0; x < width; x++) + { + output.SetR(x, y, Upsample(input.Buffer0[y * yStride + x])); + + int uvOffs = uvBase + (x & ~1); + + output.SetG(x, y, Upsample(input.Buffer1[uvOffs])); + output.SetB(x, y, Upsample(input.Buffer1[uvOffs + 1])); + output.SetA(x, y, 0x3ff); + } + } + } + + input.Return(rm.BufferPool); + + return output; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128<byte> HighToLow(Vector128<byte> value) + { + return Sse.MoveHighToLow(value.AsSingle(), value.AsSingle()).AsByte(); + } + + private static InputSurface ReadSurface( + ResourceManager rm, + ref SlotConfig config, + ref SlotSurfaceConfig surfaceConfig, + ref Array8<PlaneOffsets> offsets, + int bytesPerPixel, + int planes) + { + InputSurface surface = new InputSurface(); + + surface.Initialize(); + + int gobBlocksInY = 1 << surfaceConfig.SlotBlkHeight; + + bool linear = surfaceConfig.SlotBlkKind == 0; + + int lw = surfaceConfig.SlotLumaWidth + 1; + int lh = surfaceConfig.SlotLumaHeight + 1; + + int cw = surfaceConfig.SlotChromaWidth + 1; + int ch = surfaceConfig.SlotChromaHeight + 1; + + // Interlaced inputs have double the height when deinterlaced. + int heightShift = config.FrameFormat.IsField() ? 1 : 0; + + surface.Width = lw; + surface.Height = lh << heightShift; + surface.UvWidth = cw; + surface.UvHeight = ch << heightShift; + + if (planes > 0) + { + surface.SetBuffer0(ReadBuffer(rm, ref config, ref offsets, linear, 0, lw, lh, bytesPerPixel, gobBlocksInY)); + } + + if (planes > 1) + { + surface.SetBuffer1(ReadBuffer(rm, ref config, ref offsets, linear, 1, cw, ch, planes == 2 ? 2 : 1, gobBlocksInY)); + } + + if (planes > 2) + { + surface.SetBuffer2(ReadBuffer(rm, ref config, ref offsets, linear, 2, cw, ch, 1, gobBlocksInY)); + } + + return surface; + } + + private static RentedBuffer ReadBuffer( + ResourceManager rm, + scoped ref SlotConfig config, + scoped ref Array8<PlaneOffsets> offsets, + bool linear, + int plane, + int width, + int height, + int bytesPerPixel, + int gobBlocksInY) + { + FrameFormat frameFormat = config.FrameFormat; + bool isLuma = plane == 0; + bool isField = frameFormat.IsField(); + bool isTopField = frameFormat.IsTopField(isLuma); + int stride = GetPitch(width, bytesPerPixel); + uint offset = GetOffset(ref offsets[0], plane); + + int dstStart = 0; + int dstStride = stride; + + if (isField) + { + dstStart = isTopField ? 0 : stride; + dstStride = stride * 2; + } + + RentedBuffer buffer; + + if (linear) + { + buffer = ReadBufferLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel); + } + else + { + buffer = ReadBufferBlockLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel, gobBlocksInY); + } + + if (isField || frameFormat.IsInterlaced()) + { + RentedBuffer prevBuffer = RentedBuffer.Empty; + RentedBuffer nextBuffer = RentedBuffer.Empty; + + if (config.PrevFieldEnable) + { + prevBuffer = ReadBufferNoDeinterlace(rm, ref offsets[1], linear, plane, width, height, bytesPerPixel, gobBlocksInY); + } + + if (config.NextFieldEnable) + { + nextBuffer = ReadBufferNoDeinterlace(rm, ref offsets[2], linear, plane, width, height, bytesPerPixel, gobBlocksInY); + } + + int w = width * bytesPerPixel; + + switch (config.DeinterlaceMode) + { + case DeinterlaceMode.Weave: + Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField); + break; + case DeinterlaceMode.BobField: + Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField); + break; + case DeinterlaceMode.Bob: + bool isCurrentTop = isLuma ? config.IsEven : config.ChromaEven; + Scaler.DeinterlaceBob(buffer.Data, w, stride, isCurrentTop ^ frameFormat.IsInterlacedBottomFirst()); + break; + case DeinterlaceMode.NewBob: + case DeinterlaceMode.Disi1: + Scaler.DeinterlaceMotionAdaptive(buffer.Data, prevBuffer.Data, nextBuffer.Data, w, stride, isTopField); + break; + case DeinterlaceMode.WeaveLumaBobFieldChroma: + if (isLuma) + { + Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField); + } + else + { + Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField); + } + break; + default: + Logger.Error?.Print(LogClass.Vic, $"Unsupported deinterlace mode \"{config.DeinterlaceMode}\"."); + break; + } + + prevBuffer.Return(rm.BufferPool); + nextBuffer.Return(rm.BufferPool); + } + + return buffer; + } + + private static uint GetOffset(ref PlaneOffsets offsets, int plane) + { + return plane switch + { + 0 => offsets.LumaOffset, + 1 => offsets.ChromaUOffset, + 2 => offsets.ChromaVOffset, + _ => throw new ArgumentOutOfRangeException(nameof(plane)) + }; + } + + private static RentedBuffer ReadBufferNoDeinterlace( + ResourceManager rm, + ref PlaneOffsets offsets, + bool linear, + int plane, + int width, + int height, + int bytesPerPixel, + int gobBlocksInY) + { + int stride = GetPitch(width, bytesPerPixel); + uint offset = GetOffset(ref offsets, plane); + + if (linear) + { + return ReadBufferLinear(rm, offset, width, height, 0, stride, bytesPerPixel); + } + + return ReadBufferBlockLinear(rm, offset, width, height, 0, stride, bytesPerPixel, gobBlocksInY); + } + + private static RentedBuffer ReadBufferLinear( + ResourceManager rm, + uint offset, + int width, + int height, + int dstStart, + int dstStride, + int bytesPerPixel) + { + int srcStride = GetPitch(width, bytesPerPixel); + int inSize = srcStride * height; + + ReadOnlySpan<byte> src = rm.Gmm.GetSpan(ExtendOffset(offset), inSize); + + int outSize = dstStride * height; + int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer); + Span<byte> dst = buffer; + dst = dst.Slice(0, outSize); + + for (int y = 0; y < height; y++) + { + src.Slice(y * srcStride, srcStride).CopyTo(dst.Slice(dstStart + y * dstStride, srcStride)); + } + + return new RentedBuffer(dst, bufferIndex); + } + + private static RentedBuffer ReadBufferBlockLinear( + ResourceManager rm, + uint offset, + int width, + int height, + int dstStart, + int dstStride, + int bytesPerPixel, + int gobBlocksInY) + { + int inSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY); + + ReadOnlySpan<byte> src = rm.Gmm.GetSpan(ExtendOffset(offset), inSize); + + int outSize = dstStride * height; + int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer); + Span<byte> dst = buffer; + dst = dst.Slice(0, outSize); + + LayoutConverter.ConvertBlockLinearToLinear(dst.Slice(dstStart), width, height, dstStride, bytesPerPixel, gobBlocksInY, src); + + return new RentedBuffer(dst, bufferIndex); + } + } +} |