aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Ryujinx.Graphics.Gpu/Constants.cs5
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs45
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs8
-rw-r--r--Ryujinx.Graphics.Gpu/Memory/BufferManager.cs46
-rw-r--r--Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs4
-rw-r--r--Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs6
-rw-r--r--Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs2
-rw-r--r--Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs6
-rw-r--r--Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs10
-rw-r--r--Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs10
-rw-r--r--Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs8
-rw-r--r--Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs5
-rw-r--r--Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs13
-rw-r--r--Ryujinx.Graphics.Shader/Constants.cs2
-rw-r--r--Ryujinx.Graphics.Shader/IGpuAccessor.cs9
-rw-r--r--Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs206
16 files changed, 317 insertions, 68 deletions
diff --git a/Ryujinx.Graphics.Gpu/Constants.cs b/Ryujinx.Graphics.Gpu/Constants.cs
index d580049f..1897f5d0 100644
--- a/Ryujinx.Graphics.Gpu/Constants.cs
+++ b/Ryujinx.Graphics.Gpu/Constants.cs
@@ -95,5 +95,10 @@ namespace Ryujinx.Graphics.Gpu
/// Byte alignment for block linear textures
/// </summary>
public const int GobAlignment = 64;
+
+ /// <summary>
+ /// Expected byte alignment for storage buffers
+ /// </summary>
+ public const int StorageAlignment = 16;
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
index bc291174..cd509471 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
@@ -138,7 +138,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
qmd.CtaThreadDimension1,
qmd.CtaThreadDimension2,
localMemorySize,
- sharedMemorySize);
+ sharedMemorySize,
+ _channel.BufferManager.HasUnalignedStorageBuffers);
CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
@@ -150,6 +151,33 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
ShaderProgramInfo info = cs.Shaders[0].Info;
+ bool hasUnaligned = _channel.BufferManager.HasUnalignedStorageBuffers;
+
+ for (int index = 0; index < info.SBuffers.Count; index++)
+ {
+ BufferDescriptor sb = info.SBuffers[index];
+
+ ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
+
+ int sbDescOffset = 0x310 + sb.Slot * 0x10;
+
+ sbDescAddress += (ulong)sbDescOffset;
+
+ SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
+
+ _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
+ }
+
+ if ((_channel.BufferManager.HasUnalignedStorageBuffers) != hasUnaligned)
+ {
+ // Refetch the shader, as assumptions about storage buffer alignment have changed.
+ cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
+
+ _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
+
+ info = cs.Shaders[0].Info;
+ }
+
for (int index = 0; index < info.CBuffers.Count; index++)
{
BufferDescriptor cb = info.CBuffers[index];
@@ -174,21 +202,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
_channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
}
- for (int index = 0; index < info.SBuffers.Count; index++)
- {
- BufferDescriptor sb = info.SBuffers[index];
-
- ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
-
- int sbDescOffset = 0x310 + sb.Slot * 0x10;
-
- sbDescAddress += (ulong)sbDescOffset;
-
- SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
-
- _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
- }
-
_channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers);
_channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers);
diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
index 3f71172c..d51077dc 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
@@ -293,9 +293,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// </summary>
private void CommitBindings()
{
+ var buffers = _channel.BufferManager;
+ var hasUnaligned = buffers.HasUnalignedStorageBuffers;
+
UpdateStorageBuffers();
- if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState))
+ if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState) || (buffers.HasUnalignedStorageBuffers != hasUnaligned))
{
// Shader must be reloaded.
UpdateShaderState();
@@ -1361,7 +1364,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
_state.State.AlphaTestFunc,
_state.State.AlphaTestRef,
ref attributeTypes,
- _drawState.HasConstantBufferDrawParameters);
+ _drawState.HasConstantBufferDrawParameters,
+ _channel.BufferManager.HasUnalignedStorageBuffers);
}
/// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
index 9f1f88b1..1b67f650 100644
--- a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
@@ -17,6 +17,9 @@ namespace Ryujinx.Graphics.Gpu.Memory
private readonly GpuContext _context;
private readonly GpuChannel _channel;
+ private int _unalignedStorageBuffers;
+ public bool HasUnalignedStorageBuffers => _unalignedStorageBuffers > 0;
+
private IndexBuffer _indexBuffer;
private readonly VertexBuffer[] _vertexBuffers;
private readonly BufferBounds[] _transformFeedbackBuffers;
@@ -39,6 +42,11 @@ namespace Ryujinx.Graphics.Gpu.Memory
public BufferBounds[] Buffers { get; }
/// <summary>
+ /// Flag indicating if this binding is unaligned.
+ /// </summary>
+ public bool[] Unaligned { get; }
+
+ /// <summary>
/// Total amount of buffers used on the shader.
/// </summary>
public int Count { get; private set; }
@@ -51,6 +59,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
{
Bindings = new BufferDescriptor[count];
Buffers = new BufferBounds[count];
+ Unaligned = new bool[count];
}
/// <summary>
@@ -203,6 +212,31 @@ namespace Ryujinx.Graphics.Gpu.Memory
}
/// <summary>
+ /// Records the alignment of a storage buffer.
+ /// Unaligned storage buffers disable some optimizations on the shader.
+ /// </summary>
+ /// <param name="buffers">The binding list to modify</param>
+ /// <param name="index">Index of the storage buffer</param>
+ /// <param name="gpuVa">Start GPU virtual address of the buffer</param>
+ private void RecordStorageAlignment(BuffersPerStage buffers, int index, ulong gpuVa)
+ {
+ bool unaligned = (gpuVa & (Constants.StorageAlignment - 1)) != 0;
+
+ if (unaligned || HasUnalignedStorageBuffers)
+ {
+ // Check if the alignment changed for this binding.
+
+ ref bool currentUnaligned = ref buffers.Unaligned[index];
+
+ if (currentUnaligned != unaligned)
+ {
+ currentUnaligned = unaligned;
+ _unalignedStorageBuffers += unaligned ? 1 : -1;
+ }
+ }
+ }
+
+ /// <summary>
/// Sets a storage buffer on the compute pipeline.
/// Storage buffers can be read and written to on shaders.
/// </summary>
@@ -214,6 +248,8 @@ namespace Ryujinx.Graphics.Gpu.Memory
{
size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);
+ RecordStorageAlignment(_cpStorageBuffers, index, gpuVa);
+
gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment);
ulong address = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size);
@@ -234,17 +270,21 @@ namespace Ryujinx.Graphics.Gpu.Memory
{
size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);
+ BuffersPerStage buffers = _gpStorageBuffers[stage];
+
+ RecordStorageAlignment(buffers, index, gpuVa);
+
gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment);
ulong address = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size);
- if (_gpStorageBuffers[stage].Buffers[index].Address != address ||
- _gpStorageBuffers[stage].Buffers[index].Size != size)
+ if (buffers.Buffers[index].Address != address ||
+ buffers.Buffers[index].Size != size)
{
_gpStorageBuffersDirty = true;
}
- _gpStorageBuffers[stage].SetBounds(index, address, size, flags);
+ buffers.SetBounds(index, address, size, flags);
}
/// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs b/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs
index 08154df3..a6718211 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs
@@ -36,6 +36,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
+ /// <param name="computeState">Compute state</param>
/// <param name="gpuVa">GPU virtual address of the compute shader</param>
/// <param name="program">Cached host program for the given state, if found</param>
/// <param name="cachedGuestCode">Cached guest code, if any found</param>
@@ -43,6 +44,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool TryFind(
GpuChannel channel,
GpuChannelPoolState poolState,
+ GpuChannelComputeState computeState,
ulong gpuVa,
out CachedShaderProgram program,
out byte[] cachedGuestCode)
@@ -50,7 +52,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
program = null;
ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(channel.MemoryManager, gpuVa);
bool hasSpecList = _cache.TryFindItem(codeAccessor, out var specList, out cachedGuestCode);
- return hasSpecList && specList.TryFindForCompute(channel, poolState, out program);
+ return hasSpecList && specList.TryFindForCompute(channel, poolState, computeState, out program);
}
/// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs
index 98748bf6..c567c2c0 100644
--- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs
@@ -226,6 +226,12 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
}
/// <inheritdoc/>
+ public bool QueryHasUnalignedStorageBuffer()
+ {
+ return _oldSpecState.GraphicsState.HasUnalignedStorageBuffer || _oldSpecState.ComputeState.HasUnalignedStorageBuffer;
+ }
+
+ /// <inheritdoc/>
public bool QueryViewportTransformDisable()
{
return _oldSpecState.GraphicsState.ViewportTransformDisable;
diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
index 0bdf4949..e23b4d50 100644
--- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
- private const uint CodeGenVersion = 3747;
+ private const uint CodeGenVersion = 3848;
private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data";
diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
index b8cb1107..28ea430c 100644
--- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
@@ -146,6 +146,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
}
/// <inheritdoc/>
+ public bool QueryHasUnalignedStorageBuffer()
+ {
+ return _state.GraphicsState.HasUnalignedStorageBuffer || _state.ComputeState.HasUnalignedStorageBuffer;
+ }
+
+ /// <inheritdoc/>
public InputTopology QueryPrimitiveTopology()
{
_state.SpecializationState?.RecordPrimitiveTopology();
diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs
index 89a3db71..356d3f3e 100644
--- a/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs
@@ -33,6 +33,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
public readonly int SharedMemorySize;
/// <summary>
+ /// Indicates that any storage buffer use is unaligned.
+ /// </summary>
+ public readonly bool HasUnalignedStorageBuffer;
+
+ /// <summary>
/// Creates a new GPU compute state.
/// </summary>
/// <param name="localSizeX">Local group size X of the compute shader</param>
@@ -40,18 +45,21 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="localSizeZ">Local group size Z of the compute shader</param>
/// <param name="localMemorySize">Local memory size of the compute shader</param>
/// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
+ /// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
public GpuChannelComputeState(
int localSizeX,
int localSizeY,
int localSizeZ,
int localMemorySize,
- int sharedMemorySize)
+ int sharedMemorySize,
+ bool hasUnalignedStorageBuffer)
{
LocalSizeX = localSizeX;
LocalSizeY = localSizeY;
LocalSizeZ = localSizeZ;
LocalMemorySize = localMemorySize;
SharedMemorySize = sharedMemorySize;
+ HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
}
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs
index b0727677..511f4c23 100644
--- a/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs
@@ -83,6 +83,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
public readonly bool HasConstantBufferDrawParameters;
/// <summary>
+ /// Indicates that any storage buffer use is unaligned.
+ /// </summary>
+ public readonly bool HasUnalignedStorageBuffer;
+
+ /// <summary>
/// Creates a new GPU graphics state.
/// </summary>
/// <param name="earlyZForce">Early Z force enable</param>
@@ -99,6 +104,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="alphaTestReference">When alpha test is enabled, indicates the value to compare with the fragment output alpha</param>
/// <param name="attributeTypes">Type of the vertex attributes consumed by the shader</param>
/// <param name="hasConstantBufferDrawParameters">Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0</param>
+ /// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
public GpuChannelGraphicsState(
bool earlyZForce,
PrimitiveTopology topology,
@@ -113,7 +119,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
CompareOp alphaTestCompare,
float alphaTestReference,
ref Array32<AttributeType> attributeTypes,
- bool hasConstantBufferDrawParameters)
+ bool hasConstantBufferDrawParameters,
+ bool hasUnalignedStorageBuffer)
{
EarlyZForce = earlyZForce;
Topology = topology;
@@ -129,6 +136,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
AlphaTestReference = alphaTestReference;
AttributeTypes = attributeTypes;
HasConstantBufferDrawParameters = hasConstantBufferDrawParameters;
+ HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
}
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index 1803dae6..2a9dd6a5 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -203,12 +203,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
GpuChannelComputeState computeState,
ulong gpuVa)
{
- if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, cpShader, gpuVa))
+ if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, computeState, cpShader, gpuVa))
{
return cpShader;
}
- if (_computeShaderCache.TryFind(channel, poolState, gpuVa, out cpShader, out byte[] cachedGuestCode))
+ if (_computeShaderCache.TryFind(channel, poolState, computeState, gpuVa, out cpShader, out byte[] cachedGuestCode))
{
_cpPrograms[gpuVa] = cpShader;
return cpShader;
@@ -473,18 +473,20 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="poolState">GPU channel state to verify shader compatibility</param>
+ /// <param name="computeState">GPU channel compute state to verify shader compatibility</param>
/// <param name="cpShader">Cached compute shader</param>
/// <param name="gpuVa">GPU virtual address of the shader code in memory</param>
/// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual(
GpuChannel channel,
GpuChannelPoolState poolState,
+ GpuChannelComputeState computeState,
CachedShaderProgram cpShader,
ulong gpuVa)
{
if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa))
{
- return cpShader.SpecializationState.MatchesCompute(channel, poolState, true);
+ return cpShader.SpecializationState.MatchesCompute(channel, poolState, computeState, true);
}
return false;
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs
index abc9d913..cb6ab49a 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs
@@ -53,13 +53,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
+ /// <param name="computeState">Compute state</param>
/// <param name="program">Cached program, if found</param>
/// <returns>True if a compatible program is found, false otherwise</returns>
- public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, out CachedShaderProgram program)
+ public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, out CachedShaderProgram program)
{
foreach (var entry in _entries)
{
- if (entry.SpecializationState.MatchesCompute(channel, poolState, true))
+ if (entry.SpecializationState.MatchesCompute(channel, poolState, computeState, true))
{
program = entry;
return true;
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs
index 0aecc5b7..8f931507 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs
@@ -531,6 +531,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
return false;
}
+ if (graphicsState.HasUnalignedStorageBuffer != GraphicsState.HasUnalignedStorageBuffer)
+ {
+ return false;
+ }
+
return Matches(channel, poolState, checkTextures, isCompute: false);
}
@@ -539,10 +544,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
+ /// <param name="computeState">Compute state</param>
/// <param name="checkTextures">Indicates whether texture descriptors should be checked</param>
/// <returns>True if the state matches, false otherwise</returns>
- public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState, bool checkTextures)
+ public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, bool checkTextures)
{
+ if (computeState.HasUnalignedStorageBuffer != ComputeState.HasUnalignedStorageBuffer)
+ {
+ return false;
+ }
+
return Matches(channel, poolState, checkTextures, isCompute: true);
}
diff --git a/Ryujinx.Graphics.Shader/Constants.cs b/Ryujinx.Graphics.Shader/Constants.cs
index 7f1445ed..c6f9ef49 100644
--- a/Ryujinx.Graphics.Shader/Constants.cs
+++ b/Ryujinx.Graphics.Shader/Constants.cs
@@ -10,5 +10,7 @@ namespace Ryujinx.Graphics.Shader
public const int NvnBaseVertexByteOffset = 0x640;
public const int NvnBaseInstanceByteOffset = 0x644;
public const int NvnDrawIndexByteOffset = 0x648;
+
+ public const int StorageAlignment = 16;
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/Ryujinx.Graphics.Shader/IGpuAccessor.cs
index 4f800a14..f05a8527 100644
--- a/Ryujinx.Graphics.Shader/IGpuAccessor.cs
+++ b/Ryujinx.Graphics.Shader/IGpuAccessor.cs
@@ -178,6 +178,15 @@ namespace Ryujinx.Graphics.Shader
}
/// <summary>
+ /// Queries whenever the current draw uses unaligned storage buffer addresses.
+ /// </summary>
+ /// <returns>True if any storage buffer address is not aligned to 16 bytes, false otherwise</returns>
+ bool QueryHasUnalignedStorageBuffer()
+ {
+ return false;
+ }
+
+ /// <summary>
/// Queries host about the presence of the FrontFacing built-in variable bug.
/// </summary>
/// <returns>True if the bug is present on the host device used, false otherwise</returns>
diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
index d2200d0b..25c0eb25 100644
--- a/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
+++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
@@ -34,7 +34,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
// we can guess which storage buffer it is accessing.
// We can then replace the global memory access with a storage
// buffer access.
- node = ReplaceGlobalWithStorage(node, config, storageIndex);
+ node = ReplaceGlobalWithStorage(block, node, config, storageIndex);
}
else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
{
@@ -54,7 +54,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
}
- private static LinkedListNode<INode> ReplaceGlobalWithStorage(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
+ private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
{
Operation operation = (Operation)node.Value;
@@ -64,42 +64,10 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
config.SetUsedStorageBuffer(storageIndex, isWrite);
- Operand GetStorageOffset()
- {
- Operand addrLow = operation.GetSource(0);
-
- Operand baseAddrLow = Cbuf(0, GetStorageCbOffset(config.Stage, storageIndex));
-
- Operand baseAddrTrunc = Local();
-
- Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
-
- Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
-
- node.List.AddBefore(node, andOp);
-
- Operand byteOffset = Local();
- Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
-
- node.List.AddBefore(node, subOp);
-
- if (isStg16Or8)
- {
- return byteOffset;
- }
-
- Operand wordOffset = Local();
- Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
-
- node.List.AddBefore(node, shrOp);
-
- return wordOffset;
- }
-
Operand[] sources = new Operand[operation.SourcesCount];
sources[0] = Const(storageIndex);
- sources[1] = GetStorageOffset();
+ sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8);
for (int index = 2; index < operation.SourcesCount; index++)
{
@@ -144,6 +112,170 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return node;
}
+ private static Operand GetStorageOffset(
+ BasicBlock block,
+ LinkedListNode<INode> node,
+ ShaderConfig config,
+ int storageIndex,
+ Operand addrLow,
+ bool isStg16Or8)
+ {
+ int baseAddressCbOffset = GetStorageCbOffset(config.Stage, storageIndex);
+
+ bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment);
+
+ (Operand byteOffset, int constantOffset) = storageAligned ?
+ GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), baseAddressCbOffset) :
+ (null, 0);
+
+ if (byteOffset == null)
+ {
+ Operand baseAddrLow = Cbuf(0, baseAddressCbOffset);
+ Operand baseAddrTrunc = Local();
+
+ Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
+
+ Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
+
+ node.List.AddBefore(node, andOp);
+
+ Operand offset = Local();
+ Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc);
+
+ node.List.AddBefore(node, subOp);
+
+ byteOffset = offset;
+ }
+ else if (constantOffset != 0)
+ {
+ Operand offset = Local();
+ Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
+
+ node.List.AddBefore(node, addOp);
+
+ byteOffset = offset;
+ }
+
+ if (byteOffset != null)
+ {
+ ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset);
+ }
+
+ if (isStg16Or8)
+ {
+ return byteOffset;
+ }
+
+ Operand wordOffset = Local();
+ Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
+
+ node.List.AddBefore(node, shrOp);
+
+ return wordOffset;
+ }
+
+ private static bool IsCb0Offset(Operand operand, int offset)
+ {
+ return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == 0 && operand.GetCbufOffset() == offset;
+ }
+
+ private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset)
+ {
+ // When we emit 16/8-bit LDG, we add extra code to determine the address alignment.
+ // Eliminate the storage buffer base address from this too, leaving only the byte offset.
+
+ foreach (INode useNode in address.UseOps)
+ {
+ if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd)
+ {
+ Operand src1 = op.GetSource(0);
+ Operand src2 = op.GetSource(1);
+
+ int addressIndex = -1;
+
+ if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3)
+ {
+ addressIndex = 0;
+ }
+ else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3)
+ {
+ addressIndex = 1;
+ }
+
+ if (addressIndex != -1)
+ {
+ LinkedListNode<INode> node = list.Find(op);
+
+ // Add offset calculation before the use. Needs to be on the same block.
+ if (node != null)
+ {
+ Operand offset = Local();
+ Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
+ list.AddBefore(node, addOp);
+
+ op.SetSource(addressIndex, offset);
+ }
+ }
+ }
+ }
+ }
+
+ private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int baseAddressCbOffset)
+ {
+ if (IsCb0Offset(address, baseAddressCbOffset))
+ {
+ // Direct offset: zero.
+ return (Const(0), 0);
+ }
+
+ (address, int constantOffset) = GetStorageConstantOffset(block, address);
+
+ address = Utils.FindLastOperation(address, block);
+
+ if (IsCb0Offset(address, baseAddressCbOffset))
+ {
+ // Only constant offset
+ return (Const(0), constantOffset);
+ }
+
+ if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
+ {
+ return (null, 0);
+ }
+
+ Operand src1 = offsetAdd.GetSource(0);
+ Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block);
+
+ if (IsCb0Offset(src2, baseAddressCbOffset))
+ {
+ return (src1, constantOffset);
+ }
+ else if (IsCb0Offset(src1, baseAddressCbOffset))
+ {
+ return (src2, constantOffset);
+ }
+
+ return (null, 0);
+ }
+
+ private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address)
+ {
+ if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
+ {
+ return (address, 0);
+ }
+
+ Operand src1 = offsetAdd.GetSource(0);
+ Operand src2 = offsetAdd.GetSource(1);
+
+ if (src2.Type != OperandType.Constant)
+ {
+ return (address, 0);
+ }
+
+ return (src1, src2.Value);
+ }
+
private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
{
Operation operation = (Operation)node.Value;
@@ -165,7 +297,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
Operand byteOffset = Local();
Operand wordOffset = Local();
- Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
+ Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
node.List.AddBefore(node, subOp);
@@ -260,7 +392,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
if (operand.Type == OperandType.ConstantBuffer)
{
- int slot = operand.GetCbufSlot();
+ int slot = operand.GetCbufSlot();
int offset = operand.GetCbufOffset();
if (slot == 0 && offset >= sbStart && offset < sbEnd)