aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs56
-rw-r--r--Ryujinx.Graphics.Texture/BlockLinearLayout.cs94
-rw-r--r--Ryujinx.Graphics.Texture/Bpp12Pixel.cs11
-rw-r--r--Ryujinx.Graphics.Texture/LayoutConverter.cs194
-rw-r--r--Ryujinx.Graphics.Texture/OffsetCalculator.cs64
5 files changed, 357 insertions, 62 deletions
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs b/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs
index 7244db32..2e6fe0ab 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs
@@ -1,6 +1,7 @@
using Ryujinx.Graphics.Gpu.State;
using Ryujinx.Graphics.Texture;
using System;
+using System.Runtime.Intrinsics;
namespace Ryujinx.Graphics.Gpu.Engine
{
@@ -56,19 +57,58 @@ namespace Ryujinx.Graphics.Gpu.Engine
ulong srcBaseAddress = _context.MemoryManager.Translate(cbp.SrcAddress.Pack());
ulong dstBaseAddress = _context.MemoryManager.Translate(cbp.DstAddress.Pack());
- for (int y = 0; y < cbp.YCount; y++)
- for (int x = 0; x < cbp.XCount; x++)
+ (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, cbp.XCount, cbp.YCount);
+ (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, cbp.XCount, cbp.YCount);
+
+ ReadOnlySpan<byte> srcSpan = _context.PhysicalMemory.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize);
+ Span<byte> dstSpan = _context.PhysicalMemory.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray();
+
+ bool completeSource = src.RegionX == 0 && src.RegionY == 0 && src.Width == cbp.XCount && src.Height == cbp.YCount;
+ bool completeDest = dst.RegionX == 0 && dst.RegionY == 0 && dst.Width == cbp.XCount && dst.Height == cbp.YCount;
+
+ if (completeSource && completeDest && srcCalculator.LayoutMatches(dstCalculator))
{
- int srcOffset = srcCalculator.GetOffset(src.RegionX + x, src.RegionY + y);
- int dstOffset = dstCalculator.GetOffset(dst.RegionX + x, dst.RegionY + y);
+ srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.
+ }
+ else
+ {
+ unsafe bool Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged
+ {
+ fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
+ {
+ byte* dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
+ byte* srcBase = srcPtr - srcBaseOffset;
+
+ for (int y = 0; y < cbp.YCount; y++)
+ {
+ srcCalculator.SetY(src.RegionY + y);
+ dstCalculator.SetY(dst.RegionY + y);
- ulong srcAddress = srcBaseAddress + (ulong)srcOffset;
- ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
+ for (int x = 0; x < cbp.XCount; x++)
+ {
+ int srcOffset = srcCalculator.GetOffset(src.RegionX + x);
+ int dstOffset = dstCalculator.GetOffset(dst.RegionX + x);
- ReadOnlySpan<byte> pixel = _context.PhysicalMemory.GetSpan(srcAddress, srcBpp);
+ *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset);
+ }
+ }
+ }
+ return true;
+ }
- _context.PhysicalMemory.Write(dstAddress, pixel);
+ bool _ = srcBpp switch
+ {
+ 1 => Convert<byte>(dstSpan, srcSpan),
+ 2 => Convert<ushort>(dstSpan, srcSpan),
+ 4 => Convert<uint>(dstSpan, srcSpan),
+ 8 => Convert<ulong>(dstSpan, srcSpan),
+ 12 => Convert<Bpp12Pixel>(dstSpan, srcSpan),
+ 16 => Convert<Vector128<byte>>(dstSpan, srcSpan),
+ _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
+ };
}
+
+ _context.PhysicalMemory.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
}
else
{
diff --git a/Ryujinx.Graphics.Texture/BlockLinearLayout.cs b/Ryujinx.Graphics.Texture/BlockLinearLayout.cs
index b95db702..0b112242 100644
--- a/Ryujinx.Graphics.Texture/BlockLinearLayout.cs
+++ b/Ryujinx.Graphics.Texture/BlockLinearLayout.cs
@@ -33,6 +33,11 @@ namespace Ryujinx.Graphics.Texture
private int _robSize;
private int _sliceSize;
+ // Variables for built in iteration.
+ private int _yPart;
+ private int _yzPart;
+ private int _zPart;
+
public BlockLinearLayout(
int width,
int height,
@@ -97,5 +102,94 @@ namespace Ryujinx.Graphics.Texture
return offset;
}
+
+ public (int offset, int size) GetRectangleRange(int x, int y, int width, int height)
+ {
+ // Justification:
+ // The 2D offset is a combination of separate x and y parts.
+ // Both components increase with input and never overlap bits.
+ // Therefore for each component, the minimum input value is the lowest that component can go.
+ // Minimum total value is minimum X component + minimum Y component. Similar goes for maximum.
+
+ int start = GetOffset(x, y, 0);
+ int end = GetOffset(x + width - 1, y + height - 1, 0) + _texBpp; // Cover the last pixel.
+ return (start, end - start);
+ }
+
+ public bool LayoutMatches(BlockLinearLayout other)
+ {
+ return _robSize == other._robSize &&
+ _sliceSize == other._sliceSize &&
+ _texBpp == other._texBpp &&
+ _bhMask == other._bhMask &&
+ _bdMask == other._bdMask;
+ }
+
+ // Functions for built in iteration.
+ // Components of the offset can be updated separately, and combined to save some time.
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void SetY(int y)
+ {
+ int yh = y / GobHeight;
+ int offset = (yh >> _bhShift) * _robSize;
+
+ offset += (yh & _bhMask) * GobSize;
+
+ offset += ((y & 0x07) >> 1) << 6;
+ offset += ((y & 0x01) >> 0) << 4;
+
+ _yPart = offset;
+ _yzPart = offset + _zPart;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void SetZ(int z)
+ {
+ int offset = (z >> _bdShift) * _sliceSize;
+
+ offset += ((z & _bdMask) * GobSize) << _bhShift;
+
+ _zPart = offset;
+ _yzPart = offset + _yPart;
+ }
+
+ /// <summary>
+ /// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 16.
+ /// </summary>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public int GetOffsetWithLineOffset16(int x)
+ {
+ int offset = (x / GobStride) << _xShift;
+
+ offset += ((x & 0x3f) >> 5) << 8;
+ offset += ((x & 0x1f) >> 4) << 5;
+
+ return offset + _yzPart;
+ }
+
+ /// <summary>
+ /// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 64.
+ /// </summary>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public int GetOffsetWithLineOffset64(int x)
+ {
+ int offset = (x / GobStride) << _xShift;
+
+ return offset + _yzPart;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public int GetOffset(int x)
+ {
+ x <<= _bppShift;
+ int offset = (x / GobStride) << _xShift;
+
+ offset += ((x & 0x3f) >> 5) << 8;
+ offset += ((x & 0x1f) >> 4) << 5;
+ offset += (x & 0x0f);
+
+ return offset + _yzPart;
+ }
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Texture/Bpp12Pixel.cs b/Ryujinx.Graphics.Texture/Bpp12Pixel.cs
new file mode 100644
index 00000000..5a38259e
--- /dev/null
+++ b/Ryujinx.Graphics.Texture/Bpp12Pixel.cs
@@ -0,0 +1,11 @@
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Texture
+{
+ [StructLayout(LayoutKind.Sequential, Pack = 1, Size = 12)]
+ public struct Bpp12Pixel
+ {
+ private ulong _elem1;
+ private uint _elem2;
+ }
+}
diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs
index ce2b37b5..525271c4 100644
--- a/Ryujinx.Graphics.Texture/LayoutConverter.cs
+++ b/Ryujinx.Graphics.Texture/LayoutConverter.cs
@@ -1,6 +1,6 @@
using Ryujinx.Common;
using System;
-
+using System.Runtime.Intrinsics;
using static Ryujinx.Graphics.Texture.BlockLinearConstants;
namespace Ryujinx.Graphics.Texture
@@ -64,11 +64,14 @@ namespace Ryujinx.Graphics.Texture
}
int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16);
+ int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64);
int xStart = strideTrunc / bytesPerPixel;
int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
+ int outStrideGap = stride - w * bytesPerPixel;
+
int alignment = gobWidth;
if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight)
@@ -86,36 +89,74 @@ namespace Ryujinx.Graphics.Texture
mipGobBlocksInZ,
bytesPerPixel);
- for (int layer = 0; layer < layers; layer++)
+ unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
{
- int inBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level);
-
- for (int z = 0; z < d; z++)
- for (int y = 0; y < h; y++)
+ fixed (byte* outputPtr = output, dataPtr = data)
{
- for (int x = 0; x < strideTrunc; x += 16)
+ byte* outPtr = outputPtr + outOffs;
+ for (int layer = 0; layer < layers; layer++)
{
- int offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset(x, y, z);
-
- Span<byte> dest = output.Slice(outOffs + x, 16);
-
- data.Slice(offset, 16).CopyTo(dest);
- }
-
- for (int x = xStart; x < w; x++)
- {
- int offset = inBaseOffset + layoutConverter.GetOffset(x, y, z);
-
- Span<byte> dest = output.Slice(outOffs + x * bytesPerPixel, bytesPerPixel);
-
- data.Slice(offset, bytesPerPixel).CopyTo(dest);
+ byte* inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level));
+
+ for (int z = 0; z < d; z++)
+ {
+ layoutConverter.SetZ(z);
+ for (int y = 0; y < h; y++)
+ {
+ layoutConverter.SetY(y);
+
+ for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64)
+ {
+ byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x);
+ byte* offset2 = offset + 0x20;
+ byte* offset3 = offset + 0x100;
+ byte* offset4 = offset + 0x120;
+
+ Vector128<byte> value = *(Vector128<byte>*)offset;
+ Vector128<byte> value2 = *(Vector128<byte>*)offset2;
+ Vector128<byte> value3 = *(Vector128<byte>*)offset3;
+ Vector128<byte> value4 = *(Vector128<byte>*)offset4;
+
+ *(Vector128<byte>*)outPtr = value;
+ *(Vector128<byte>*)(outPtr + 16) = value2;
+ *(Vector128<byte>*)(outPtr + 32) = value3;
+ *(Vector128<byte>*)(outPtr + 48) = value4;
+ }
+
+ for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16)
+ {
+ byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
+
+ *(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset;
+ }
+
+ for (int x = xStart; x < w; x++, outPtr += bytesPerPixel)
+ {
+ byte* offset = inBaseOffset + layoutConverter.GetOffset(x);
+
+ *(T*)outPtr = *(T*)offset;
+ }
+
+ outPtr += outStrideGap;
+ }
+ }
}
-
- outOffs += stride;
+ outOffs += stride * h * d * layers;
}
+ return true;
}
- }
+ bool _ = bytesPerPixel switch
+ {
+ 1 => Convert<byte>(output, data),
+ 2 => Convert<ushort>(output, data),
+ 4 => Convert<uint>(output, data),
+ 8 => Convert<ulong>(output, data),
+ 12 => Convert<Bpp12Pixel>(output, data),
+ 16 => Convert<Vector128<byte>>(output, data),
+ _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
+ };
+ }
return output;
}
@@ -132,22 +173,18 @@ namespace Ryujinx.Graphics.Texture
int h = BitUtils.DivRoundUp(height, blockHeight);
int outStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
+ int lineSize = w * bytesPerPixel;
Span<byte> output = new byte[h * outStride];
int outOffs = 0;
+ int inOffs = 0;
for (int y = 0; y < h; y++)
{
- for (int x = 0; x < w; x++)
- {
- int offset = y * stride + x * bytesPerPixel;
-
- Span<byte> dest = output.Slice(outOffs + x * bytesPerPixel, bytesPerPixel);
-
- data.Slice(offset, bytesPerPixel).CopyTo(dest);
- }
+ data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize));
+ inOffs += stride;
outOffs += outStride;
}
@@ -198,8 +235,15 @@ namespace Ryujinx.Graphics.Texture
mipGobBlocksInZ >>= 1;
}
+ int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16);
+ int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64);
+
+ int xStart = strideTrunc / bytesPerPixel;
+
int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
+ int inStrideGap = stride - w * bytesPerPixel;
+
int alignment = gobWidth;
if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight)
@@ -217,25 +261,73 @@ namespace Ryujinx.Graphics.Texture
mipGobBlocksInZ,
bytesPerPixel);
- for (int layer = 0; layer < layers; layer++)
+ unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
{
- int outBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level);
-
- for (int z = 0; z < d; z++)
- for (int y = 0; y < h; y++)
+ fixed (byte* outputPtr = output, dataPtr = data)
{
- for (int x = 0; x < w; x++)
+ byte* inPtr = dataPtr + inOffs;
+ for (int layer = 0; layer < layers; layer++)
{
- int offset = outBaseOffset + layoutConverter.GetOffset(x, y, z);
-
- Span<byte> dest = output.Slice(offset, bytesPerPixel);
-
- data.Slice(inOffs + x * bytesPerPixel, bytesPerPixel).CopyTo(dest);
+ byte* outBaseOffset = outputPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level));
+
+ for (int z = 0; z < d; z++)
+ {
+ layoutConverter.SetZ(z);
+ for (int y = 0; y < h; y++)
+ {
+ layoutConverter.SetY(y);
+
+ for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64)
+ {
+ byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x);
+ byte* offset2 = offset + 0x20;
+ byte* offset3 = offset + 0x100;
+ byte* offset4 = offset + 0x120;
+
+ Vector128<byte> value = *(Vector128<byte>*)inPtr;
+ Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16);
+ Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32);
+ Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48);
+
+ *(Vector128<byte>*)offset = value;
+ *(Vector128<byte>*)offset2 = value2;
+ *(Vector128<byte>*)offset3 = value3;
+ *(Vector128<byte>*)offset4 = value4;
+ }
+
+ for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16)
+ {
+ byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
+
+ *(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr;
+ }
+
+ for (int x = xStart; x < w; x++, inPtr += bytesPerPixel)
+ {
+ byte* offset = outBaseOffset + layoutConverter.GetOffset(x);
+
+ *(T*)offset = *(T*)inPtr;
+ }
+
+ inPtr += inStrideGap;
+ }
+ }
}
-
- inOffs += stride;
+ inOffs += stride * h * d * layers;
}
+ return true;
}
+
+ bool _ = bytesPerPixel switch
+ {
+ 1 => Convert<byte>(output, data),
+ 2 => Convert<ushort>(output, data),
+ 4 => Convert<uint>(output, data),
+ 8 => Convert<ulong>(output, data),
+ 12 => Convert<Bpp12Pixel>(output, data),
+ 16 => Convert<Vector128<byte>>(output, data),
+ _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
+ };
}
return output;
@@ -254,23 +346,19 @@ namespace Ryujinx.Graphics.Texture
int h = BitUtils.DivRoundUp(height, blockHeight);
int inStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
+ int lineSize = width * bytesPerPixel;
Span<byte> output = new byte[h * stride];
int inOffs = 0;
+ int outOffs = 0;
for (int y = 0; y < h; y++)
{
- for (int x = 0; x < w; x++)
- {
- int offset = y * stride + x * bytesPerPixel;
-
- Span<byte> dest = output.Slice(offset, bytesPerPixel);
-
- data.Slice(inOffs + x * bytesPerPixel, bytesPerPixel).CopyTo(dest);
- }
+ data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize));
inOffs += inStride;
+ outOffs += stride;
}
return output;
diff --git a/Ryujinx.Graphics.Texture/OffsetCalculator.cs b/Ryujinx.Graphics.Texture/OffsetCalculator.cs
index bb5d606c..1f5d9614 100644
--- a/Ryujinx.Graphics.Texture/OffsetCalculator.cs
+++ b/Ryujinx.Graphics.Texture/OffsetCalculator.cs
@@ -1,17 +1,22 @@
using Ryujinx.Common;
-
+using System.Runtime.CompilerServices;
using static Ryujinx.Graphics.Texture.BlockLinearConstants;
namespace Ryujinx.Graphics.Texture
{
public class OffsetCalculator
{
+ private int _width;
+ private int _height;
private int _stride;
private bool _isLinear;
private int _bytesPerPixel;
private BlockLinearLayout _layoutConverter;
+ // Variables for built in iteration.
+ private int _yPart;
+
public OffsetCalculator(
int width,
int height,
@@ -20,6 +25,8 @@ namespace Ryujinx.Graphics.Texture
int gobBlocksInY,
int bytesPerPixel)
{
+ _width = width;
+ _height = height;
_stride = stride;
_isLinear = isLinear;
_bytesPerPixel = bytesPerPixel;
@@ -40,6 +47,18 @@ namespace Ryujinx.Graphics.Texture
}
}
+ public void SetY(int y)
+ {
+ if (_isLinear)
+ {
+ _yPart = y * _stride;
+ }
+ else
+ {
+ _layoutConverter.SetY(y);
+ }
+ }
+
public int GetOffset(int x, int y)
{
if (_isLinear)
@@ -51,5 +70,48 @@ namespace Ryujinx.Graphics.Texture
return _layoutConverter.GetOffset(x, y, 0);
}
}
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public int GetOffset(int x)
+ {
+ if (_isLinear)
+ {
+ return x * _bytesPerPixel + _yPart;
+ }
+ else
+ {
+ return _layoutConverter.GetOffset(x);
+ }
+ }
+
+ public (int offset, int size) GetRectangleRange(int x, int y, int width, int height)
+ {
+ if (_isLinear)
+ {
+ int start = y * _stride + x * _bytesPerPixel;
+ int end = (y + height - 1) * _stride + (x + width) * _bytesPerPixel;
+ return (start, end - start);
+ }
+ else
+ {
+ return _layoutConverter.GetRectangleRange(x, y, width, height);
+ }
+ }
+
+ public bool LayoutMatches(OffsetCalculator other)
+ {
+ if (_isLinear)
+ {
+ return other._isLinear &&
+ _width == other._width &&
+ _height == other._height &&
+ _stride == other._stride &&
+ _bytesPerPixel == other._bytesPerPixel;
+ }
+ else
+ {
+ return !other._isLinear && _layoutConverter.LayoutMatches(other._layoutConverter);
+ }
+ }
}
} \ No newline at end of file