aboutsummaryrefslogtreecommitdiff
path: root/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
diff options
context:
space:
mode:
authorgdkchan <gab.dark.100@gmail.com>2021-08-04 17:20:58 -0300
committerGitHub <noreply@github.com>2021-08-04 22:20:58 +0200
commitff5df5d8a1fec6947f7feed3ec3ca0889cd892a5 (patch)
tree0a26c48d35bbfd88637ecccc7e837e0d2461a029 /Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
parentff8849671af5ac14fc9cc9d37da30f53d3f13d89 (diff)
Support non-contiguous copies on I2M and DMA engines (#2473)
* Support non-contiguous copies on I2M and DMA engines * Vector copy should start aligned on I2M * Nits * Zero extend the offset
Diffstat (limited to 'Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs')
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs42
1 files changed, 25 insertions, 17 deletions
diff --git a/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
index 186a4648..81c5ad77 100644
--- a/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
@@ -4,6 +4,7 @@ using Ryujinx.Graphics.Texture;
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
{
@@ -169,13 +170,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
/// </summary>
private void FinishTransfer()
{
- Span<byte> data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size);
+ var memoryManager = _channel.MemoryManager;
+
+ var data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size);
if (_isLinear && _lineCount == 1)
{
- ulong address = _channel.MemoryManager.Translate(_dstGpuVa);
-
- _channel.MemoryManager.Physical.Write(address, data);
+ memoryManager.Write(_dstGpuVa, data);
}
else
{
@@ -189,36 +190,43 @@ namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
int srcOffset = 0;
- ulong dstBaseAddress = _channel.MemoryManager.Translate(_dstGpuVa);
-
for (int y = _dstY; y < _dstY + _lineCount; y++)
{
int x1 = _dstX;
int x2 = _dstX + _lineLengthIn;
- int x2Trunc = _dstX + BitUtils.AlignDown(_lineLengthIn, 16);
+ int x1Round = BitUtils.AlignUp(_dstX, 16);
+ int x2Trunc = BitUtils.AlignDown(x2, 16);
- int x;
+ int x = x1;
- for (x = x1; x < x2Trunc; x += 16, srcOffset += 16)
+ if (x1Round <= x2)
{
- int dstOffset = dstCalculator.GetOffset(x, y);
+ for (; x < x1Round; x++, srcOffset++)
+ {
+ int dstOffset = dstCalculator.GetOffset(x, y);
+
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
- ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
+ memoryManager.Write(dstAddress, data[srcOffset]);
+ }
+ }
- Span<byte> pixel = data.Slice(srcOffset, 16);
+ for (; x < x2Trunc; x += 16, srcOffset += 16)
+ {
+ int dstOffset = dstCalculator.GetOffset(x, y);
- _channel.MemoryManager.Physical.Write(dstAddress, pixel);
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
+
+ memoryManager.Write(dstAddress, MemoryMarshal.Cast<byte, Vector128<byte>>(data.Slice(srcOffset, 16))[0]);
}
for (; x < x2; x++, srcOffset++)
{
int dstOffset = dstCalculator.GetOffset(x, y);
- ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
-
- Span<byte> pixel = data.Slice(srcOffset, 1);
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
- _channel.MemoryManager.Physical.Write(dstAddress, pixel);
+ memoryManager.Write(dstAddress, data[srcOffset]);
}
}
}