path: root/src/Ryujinx.Graphics.Gpu
diff options
Diffstat (limited to 'src/Ryujinx.Graphics.Gpu')
26 files changed, 2202 insertions, 226 deletions
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
index 6ba1bc22..180e2a6b 100644
--- a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
+++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
@@ -13,7 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <summary>
/// Represents a GPU General Purpose FIFO command processor.
/// </summary>
- class GPFifoProcessor
+ class GPFifoProcessor : IDisposable
private const int MacrosCount = 0x80;
private const int MacroIndexMask = MacrosCount - 1;
@@ -327,5 +327,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ _3dClass.Dispose();
+ }
+ }
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VertexInfoBufferUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VertexInfoBufferUpdater.cs
new file mode 100644
index 00000000..65f556fc
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VertexInfoBufferUpdater.cs
@@ -0,0 +1,141 @@
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Shader;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw
+ /// <summary>
+ /// Vertex info buffer data updater.
+ /// </summary>
+ class VertexInfoBufferUpdater : BufferUpdater
+ {
+ private VertexInfoBuffer _data;
+ /// <summary>
+ /// Creates a new instance of the vertex info buffer updater.
+ /// </summary>
+ /// <param name="renderer">Renderer that the vertex info buffer will be used with</param>
+ public VertexInfoBufferUpdater(IRenderer renderer) : base(renderer)
+ {
+ }
+ /// <summary>
+ /// Sets vertex data related counts.
+ /// </summary>
+ /// <param name="vertexCount">Number of vertices used on the draw</param>
+ /// <param name="instanceCount">Number of draw instances</param>
+ /// <param name="firstVertex">Index of the first vertex on the vertex buffer</param>
+ /// <param name="firstInstance">Index of the first instanced vertex on the vertex buffer</param>
+ public void SetVertexCounts(int vertexCount, int instanceCount, int firstVertex, int firstInstance)
+ {
+ if (_data.VertexCounts.X != vertexCount)
+ {
+ _data.VertexCounts.X = vertexCount;
+ MarkDirty(VertexInfoBuffer.VertexCountsOffset, sizeof(int));
+ }
+ if (_data.VertexCounts.Y != instanceCount)
+ {
+ _data.VertexCounts.Y = instanceCount;
+ MarkDirty(VertexInfoBuffer.VertexCountsOffset + sizeof(int), sizeof(int));
+ }
+ if (_data.VertexCounts.Z != firstVertex)
+ {
+ _data.VertexCounts.Z = firstVertex;
+ MarkDirty(VertexInfoBuffer.VertexCountsOffset + sizeof(int) * 2, sizeof(int));
+ }
+ if (_data.VertexCounts.W != firstInstance)
+ {
+ _data.VertexCounts.W = firstInstance;
+ MarkDirty(VertexInfoBuffer.VertexCountsOffset + sizeof(int) * 3, sizeof(int));
+ }
+ }
+ /// <summary>
+ /// Sets vertex data related counts.
+ /// </summary>
+ /// <param name="primitivesCount">Number of primitives consumed by the geometry shader</param>
+ public void SetGeometryCounts(int primitivesCount)
+ {
+ if (_data.GeometryCounts.X != primitivesCount)
+ {
+ _data.GeometryCounts.X = primitivesCount;
+ MarkDirty(VertexInfoBuffer.GeometryCountsOffset, sizeof(int));
+ }
+ }
+ /// <summary>
+ /// Sets a vertex stride and related data.
+ /// </summary>
+ /// <param name="index">Index of the vertex stride to be updated</param>
+ /// <param name="stride">Stride divided by the component or format size</param>
+ /// <param name="componentCount">Number of components that the format has</param>
+ public void SetVertexStride(int index, int stride, int componentCount)
+ {
+ if (_data.VertexStrides[index].X != stride)
+ {
+ _data.VertexStrides[index].X = stride;
+ MarkDirty(VertexInfoBuffer.VertexStridesOffset + index * Unsafe.SizeOf<Vector4<int>>(), sizeof(int));
+ }
+ for (int c = 1; c < 4; c++)
+ {
+ int value = c < componentCount ? 1 : 0;
+ ref int currentValue = ref GetElementRef(ref _data.VertexStrides[index], c);
+ if (currentValue != value)
+ {
+ currentValue = value;
+ MarkDirty(VertexInfoBuffer.VertexStridesOffset + index * Unsafe.SizeOf<Vector4<int>>() + c * sizeof(int), sizeof(int));
+ }
+ }
+ }
+ /// <summary>
+ /// Sets a vertex offset and related data.
+ /// </summary>
+ /// <param name="index">Index of the vertex offset to be updated</param>
+ /// <param name="offset">Offset divided by the component or format size</param>
+ /// <param name="divisor">If the draw is instanced, should have the vertex divisor value, otherwise should be zero</param>
+ public void SetVertexOffset(int index, int offset, int divisor)
+ {
+ if (_data.VertexOffsets[index].X != offset)
+ {
+ _data.VertexOffsets[index].X = offset;
+ MarkDirty(VertexInfoBuffer.VertexOffsetsOffset + index * Unsafe.SizeOf<Vector4<int>>(), sizeof(int));
+ }
+ if (_data.VertexOffsets[index].Y != divisor)
+ {
+ _data.VertexOffsets[index].Y = divisor;
+ MarkDirty(VertexInfoBuffer.VertexOffsetsOffset + index * Unsafe.SizeOf<Vector4<int>>() + sizeof(int), sizeof(int));
+ }
+ }
+ /// <summary>
+ /// Sets the offset of the index buffer.
+ /// </summary>
+ /// <param name="offset">Offset divided by the component size</param>
+ public void SetIndexBufferOffset(int offset)
+ {
+ if (_data.GeometryCounts.W != offset)
+ {
+ _data.GeometryCounts.W = offset;
+ MarkDirty(VertexInfoBuffer.GeometryCountsOffset + sizeof(int) * 3, sizeof(int));
+ }
+ }
+ /// <summary>
+ /// Submits all pending buffer updates to the GPU.
+ /// </summary>
+ public void Commit()
+ {
+ Commit(MemoryMarshal.Cast<VertexInfoBuffer, byte>(MemoryMarshal.CreateSpan(ref _data, 1)));
+ }
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsCompute.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsCompute.cs
new file mode 100644
index 00000000..cbbfd251
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsCompute.cs
@@ -0,0 +1,96 @@
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Shader;
+using System;
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw
+ /// <summary>
+ /// Vertex, tessellation and geometry as compute shader draw manager.
+ /// </summary>
+ class VtgAsCompute : IDisposable
+ {
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly DeviceStateWithShadow<ThreedClassState> _state;
+ private readonly VtgAsComputeContext _vacContext;
+ /// <summary>
+ /// Creates a new instance of the vertex, tessellation and geometry as compute shader draw manager.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="state">3D engine state</param>
+ public VtgAsCompute(GpuContext context, GpuChannel channel, DeviceStateWithShadow<ThreedClassState> state)
+ {
+ _context = context;
+ _channel = channel;
+ _state = state;
+ _vacContext = new(context);
+ }
+ /// <summary>
+ /// Emulates the pre-rasterization stages of a draw operation using a compute shader.
+ /// </summary>
+ /// <param name="engine">3D engine</param>
+ /// <param name="vertexAsCompute">Vertex shader converted to compute</param>
+ /// <param name="geometryAsCompute">Optional geometry shader converted to compute</param>
+ /// <param name="vertexPassthroughProgram">Fragment shader with a vertex passthrough shader to feed the compute output into the fragment stage</param>
+ /// <param name="topology">Primitive topology of the draw</param>
+ /// <param name="count">Index or vertex count of the draw</param>
+ /// <param name="instanceCount">Instance count</param>
+ /// <param name="firstIndex">First index on the index buffer, for indexed draws</param>
+ /// <param name="firstVertex">First vertex on the vertex buffer</param>
+ /// <param name="firstInstance">First instance</param>
+ /// <param name="indexed">Whether the draw is indexed</param>
+ public void DrawAsCompute(
+ ThreedClass engine,
+ ShaderAsCompute vertexAsCompute,
+ ShaderAsCompute geometryAsCompute,
+ IProgram vertexPassthroughProgram,
+ PrimitiveTopology topology,
+ int count,
+ int instanceCount,
+ int firstIndex,
+ int firstVertex,
+ int firstInstance,
+ bool indexed)
+ {
+ VtgAsComputeState state = new(
+ _context,
+ _channel,
+ _state,
+ _vacContext,
+ engine,
+ vertexAsCompute,
+ geometryAsCompute,
+ vertexPassthroughProgram,
+ topology,
+ count,
+ instanceCount,
+ firstIndex,
+ firstVertex,
+ firstInstance,
+ indexed);
+ state.RunVertex();
+ state.RunGeometry();
+ state.RunFragment();
+ _vacContext.FreeBuffers();
+ }
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ _vacContext.Dispose();
+ }
+ }
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs
new file mode 100644
index 00000000..e9b754ff
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs
@@ -0,0 +1,648 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw
+ /// <summary>
+ /// Vertex, tessellation and geometry as compute shader context.
+ /// </summary>
+ class VtgAsComputeContext : IDisposable
+ {
+ private const int DummyBufferSize = 16;
+ private readonly GpuContext _context;
+ /// <summary>
+ /// Cache of buffer textures used for vertex and index buffers.
+ /// </summary>
+ private class BufferTextureCache : IDisposable
+ {
+ private readonly Dictionary<Format, ITexture> _cache;
+ /// <summary>
+ /// Creates a new instance of the buffer texture cache.
+ /// </summary>
+ public BufferTextureCache()
+ {
+ _cache = new();
+ }
+ /// <summary>
+ /// Gets a cached or creates and caches a buffer texture with the specified format.
+ /// </summary>
+ /// <param name="renderer">Renderer where the texture will be used</param>
+ /// <param name="format">Format of the buffer texture</param>
+ /// <returns>Buffer texture</returns>
+ public ITexture Get(IRenderer renderer, Format format)
+ {
+ if (!_cache.TryGetValue(format, out ITexture bufferTexture))
+ {
+ bufferTexture = renderer.CreateTexture(new TextureCreateInfo(
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ format,
+ DepthStencilMode.Depth,
+ Target.TextureBuffer,
+ SwizzleComponent.Red,
+ SwizzleComponent.Green,
+ SwizzleComponent.Blue,
+ SwizzleComponent.Alpha));
+ _cache.Add(format, bufferTexture);
+ }
+ return bufferTexture;
+ }
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ foreach (var texture in _cache.Values)
+ {
+ texture.Release();
+ }
+ _cache.Clear();
+ }
+ }
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+ }
+ /// <summary>
+ /// Buffer state.
+ /// </summary>
+ private struct Buffer
+ {
+ /// <summary>
+ /// Buffer handle.
+ /// </summary>
+ public BufferHandle Handle;
+ /// <summary>
+ /// Current free buffer offset.
+ /// </summary>
+ public int Offset;
+ /// <summary>
+ /// Total buffer size in bytes.
+ /// </summary>
+ public int Size;
+ }
+ /// <summary>
+ /// Index buffer state.
+ /// </summary>
+ private readonly struct IndexBuffer
+ {
+ /// <summary>
+ /// Buffer handle.
+ /// </summary>
+ public BufferHandle Handle { get; }
+ /// <summary>
+ /// Index count.
+ /// </summary>
+ public int Count { get; }
+ /// <summary>
+ /// Size in bytes.
+ /// </summary>
+ public int Size { get; }
+ /// <summary>
+ /// Creates a new index buffer state.
+ /// </summary>
+ /// <param name="handle">Buffer handle</param>
+ /// <param name="count">Index count</param>
+ /// <param name="size">Size in bytes</param>
+ public IndexBuffer(BufferHandle handle, int count, int size)
+ {
+ Handle = handle;
+ Count = count;
+ Size = size;
+ }
+ /// <summary>
+ /// Creates a full range starting from the beggining of the buffer.
+ /// </summary>
+ /// <returns>Range</returns>
+ public readonly BufferRange ToRange()
+ {
+ return new BufferRange(Handle, 0, Size);
+ }
+ /// <summary>
+ /// Creates a range starting from the beggining of the buffer, with the specified size.
+ /// </summary>
+ /// <param name="size">Size in bytes of the range</param>
+ /// <returns>Range</returns>
+ public readonly BufferRange ToRange(int size)
+ {
+ return new BufferRange(Handle, 0, size);
+ }
+ }
+ private readonly BufferTextureCache[] _bufferTextures;
+ private BufferHandle _dummyBuffer;
+ private Buffer _vertexDataBuffer;
+ private Buffer _geometryVertexDataBuffer;
+ private Buffer _geometryIndexDataBuffer;
+ private BufferHandle _sequentialIndexBuffer;
+ private int _sequentialIndexBufferCount;
+ private readonly Dictionary<PrimitiveTopology, IndexBuffer> _topologyRemapBuffers;
+ /// <summary>
+ /// Vertex information buffer updater.
+ /// </summary>
+ public VertexInfoBufferUpdater VertexInfoBufferUpdater { get; }
+ /// <summary>
+ /// Creates a new instance of the vertex, tessellation and geometry as compute shader context.
+ /// </summary>
+ /// <param name="context"></param>
+ public VtgAsComputeContext(GpuContext context)
+ {
+ _context = context;
+ _bufferTextures = new BufferTextureCache[Constants.TotalVertexBuffers + 2];
+ _topologyRemapBuffers = new();
+ VertexInfoBufferUpdater = new(context.Renderer);
+ }
+ /// <summary>
+ /// Gets the number of complete primitives that can be formed with a given vertex count, for a given topology.
+ /// </summary>
+ /// <param name="primitiveType">Topology</param>
+ /// <param name="count">Vertex count</param>
+ /// <returns>Total of complete primitives</returns>
+ public static int GetPrimitivesCount(PrimitiveTopology primitiveType, int count)
+ {
+ return primitiveType switch
+ {
+ PrimitiveTopology.Lines => count / 2,
+ PrimitiveTopology.LinesAdjacency => count / 4,
+ PrimitiveTopology.LineLoop => count > 1 ? count : 0,
+ PrimitiveTopology.LineStrip => Math.Max(count - 1, 0),
+ PrimitiveTopology.LineStripAdjacency => Math.Max(count - 3, 0),
+ PrimitiveTopology.Triangles => count / 3,
+ PrimitiveTopology.TrianglesAdjacency => count / 6,
+ PrimitiveTopology.TriangleStrip or
+ PrimitiveTopology.TriangleFan or
+ PrimitiveTopology.Polygon => Math.Max(count - 2, 0),
+ PrimitiveTopology.TriangleStripAdjacency => Math.Max(count - 2, 0) / 2,
+ PrimitiveTopology.Quads => (count / 4) * 2, // In triangles.
+ PrimitiveTopology.QuadStrip => Math.Max((count - 2) / 2, 0) * 2, // In triangles.
+ _ => count,
+ };
+ }
+ /// <summary>
+ /// Gets the total of vertices that a single primitive has, for the specified topology.
+ /// </summary>
+ /// <param name="primitiveType">Topology</param>
+ /// <returns>Vertex count</returns>
+ private static int GetVerticesPerPrimitive(PrimitiveTopology primitiveType)
+ {
+ return primitiveType switch
+ {
+ PrimitiveTopology.Lines or
+ PrimitiveTopology.LineLoop or
+ PrimitiveTopology.LineStrip => 2,
+ PrimitiveTopology.LinesAdjacency or
+ PrimitiveTopology.LineStripAdjacency => 4,
+ PrimitiveTopology.Triangles or
+ PrimitiveTopology.TriangleStrip or
+ PrimitiveTopology.TriangleFan or
+ PrimitiveTopology.Polygon => 3,
+ PrimitiveTopology.TrianglesAdjacency or
+ PrimitiveTopology.TriangleStripAdjacency => 6,
+ PrimitiveTopology.Quads or
+ PrimitiveTopology.QuadStrip => 3, // 2 triangles.
+ _ => 1,
+ };
+ }
+ /// <summary>
+ /// Gets a cached or creates a new buffer that can be used to map linear indices to ones
+ /// of a specified topology, and build complete primitives.
+ /// </summary>
+ /// <param name="topology">Topology</param>
+ /// <param name="count">Number of input vertices that needs to be mapped using that buffer</param>
+ /// <returns>Remap buffer range</returns>
+ public BufferRange GetOrCreateTopologyRemapBuffer(PrimitiveTopology topology, int count)
+ {
+ if (!_topologyRemapBuffers.TryGetValue(topology, out IndexBuffer buffer) || buffer.Count < count)
+ {
+ if (buffer.Handle != BufferHandle.Null)
+ {
+ _context.Renderer.DeleteBuffer(buffer.Handle);
+ }
+ buffer = CreateTopologyRemapBuffer(topology, count);
+ _topologyRemapBuffers[topology] = buffer;
+ return buffer.ToRange();
+ }
+ return buffer.ToRange(Math.Max(GetPrimitivesCount(topology, count) * GetVerticesPerPrimitive(topology), 1) * sizeof(uint));
+ }
+ /// <summary>
+ /// Creates a new topology remap buffer.
+ /// </summary>
+ /// <param name="topology">Topology</param>
+ /// <param name="count">Maximum of vertices that will be accessed</param>
+ /// <returns>Remap buffer range</returns>
+ private IndexBuffer CreateTopologyRemapBuffer(PrimitiveTopology topology, int count)
+ {
+ // Size can't be zero as creating zero sized buffers is invalid.
+ Span<int> data = new int[Math.Max(GetPrimitivesCount(topology, count) * GetVerticesPerPrimitive(topology), 1)];
+ switch (topology)
+ {
+ case PrimitiveTopology.Points:
+ case PrimitiveTopology.Lines:
+ case PrimitiveTopology.LinesAdjacency:
+ case PrimitiveTopology.Triangles:
+ case PrimitiveTopology.TrianglesAdjacency:
+ case PrimitiveTopology.Patches:
+ for (int index = 0; index < data.Length; index++)
+ {
+ data[index] = index;
+ }
+ break;
+ case PrimitiveTopology.LineLoop:
+ data[^1] = 0;
+ for (int index = 0; index < ((data.Length - 1) & ~1); index += 2)
+ {
+ data[index] = index >> 1;
+ data[index + 1] = (index >> 1) + 1;
+ }
+ break;
+ case PrimitiveTopology.LineStrip:
+ for (int index = 0; index < ((data.Length - 1) & ~1); index += 2)
+ {
+ data[index] = index >> 1;
+ data[index + 1] = (index >> 1) + 1;
+ }
+ break;
+ case PrimitiveTopology.TriangleStrip:
+ int tsTrianglesCount = data.Length / 3;
+ int tsOutIndex = 3;
+ if (tsTrianglesCount > 0)
+ {
+ data[0] = 0;
+ data[1] = 1;
+ data[2] = 2;
+ }
+ for (int tri = 1; tri < tsTrianglesCount; tri++)
+ {
+ int baseIndex = tri * 3;
+ if ((tri & 1) != 0)
+ {
+ data[baseIndex] = tsOutIndex - 1;
+ data[baseIndex + 1] = tsOutIndex - 2;
+ data[baseIndex + 2] = tsOutIndex++;
+ }
+ else
+ {
+ data[baseIndex] = tsOutIndex - 2;
+ data[baseIndex + 1] = tsOutIndex - 1;
+ data[baseIndex + 2] = tsOutIndex++;
+ }
+ }
+ break;
+ case PrimitiveTopology.TriangleFan:
+ case PrimitiveTopology.Polygon:
+ int tfTrianglesCount = data.Length / 3;
+ int tfOutIndex = 1;
+ for (int index = 0; index < tfTrianglesCount * 3; index += 3)
+ {
+ data[index] = 0;
+ data[index + 1] = tfOutIndex;
+ data[index + 2] = ++tfOutIndex;
+ }
+ break;
+ case PrimitiveTopology.Quads:
+ int qQuadsCount = data.Length / 6;
+ for (int quad = 0; quad < qQuadsCount; quad++)
+ {
+ int index = quad * 6;
+ int qIndex = quad * 4;
+ data[index] = qIndex;
+ data[index + 1] = qIndex + 1;
+ data[index + 2] = qIndex + 2;
+ data[index + 3] = qIndex;
+ data[index + 4] = qIndex + 2;
+ data[index + 5] = qIndex + 3;
+ }
+ break;
+ case PrimitiveTopology.QuadStrip:
+ int qsQuadsCount = data.Length / 6;
+ if (qsQuadsCount > 0)
+ {
+ data[0] = 0;
+ data[1] = 1;
+ data[2] = 2;
+ data[3] = 0;
+ data[4] = 2;
+ data[5] = 3;
+ }
+ for (int quad = 1; quad < qsQuadsCount; quad++)
+ {
+ int index = quad * 6;
+ int qIndex = quad * 2;
+ data[index] = qIndex + 1;
+ data[index + 1] = qIndex;
+ data[index + 2] = qIndex + 2;
+ data[index + 3] = qIndex + 1;
+ data[index + 4] = qIndex + 2;
+ data[index + 5] = qIndex + 3;
+ }
+ break;
+ case PrimitiveTopology.LineStripAdjacency:
+ for (int index = 0; index < ((data.Length - 3) & ~3); index += 4)
+ {
+ int lIndex = index >> 2;
+ data[index] = lIndex;
+ data[index + 1] = lIndex + 1;
+ data[index + 2] = lIndex + 2;
+ data[index + 3] = lIndex + 3;
+ }
+ break;
+ case PrimitiveTopology.TriangleStripAdjacency:
+ int tsaTrianglesCount = data.Length / 6;
+ int tsaOutIndex = 6;
+ if (tsaTrianglesCount > 0)
+ {
+ data[0] = 0;
+ data[1] = 1;
+ data[2] = 2;
+ data[3] = 3;
+ data[4] = 4;
+ data[5] = 5;
+ }
+ for (int tri = 1; tri < tsaTrianglesCount; tri++)
+ {
+ int baseIndex = tri * 6;
+ if ((tri & 1) != 0)
+ {
+ data[baseIndex] = tsaOutIndex - 2;
+ data[baseIndex + 1] = tsaOutIndex - 1;
+ data[baseIndex + 2] = tsaOutIndex - 4;
+ data[baseIndex + 3] = tsaOutIndex - 3;
+ data[baseIndex + 4] = tsaOutIndex++;
+ data[baseIndex + 5] = tsaOutIndex++;
+ }
+ else
+ {
+ data[baseIndex] = tsaOutIndex - 4;
+ data[baseIndex + 1] = tsaOutIndex - 3;
+ data[baseIndex + 2] = tsaOutIndex - 2;
+ data[baseIndex + 3] = tsaOutIndex - 1;
+ data[baseIndex + 4] = tsaOutIndex++;
+ data[baseIndex + 5] = tsaOutIndex++;
+ }
+ }
+ break;
+ }
+ ReadOnlySpan<byte> dataBytes = MemoryMarshal.Cast<int, byte>(data);
+ BufferHandle buffer = _context.Renderer.CreateBuffer(dataBytes.Length);
+ _context.Renderer.SetBufferData(buffer, 0, dataBytes);
+ return new IndexBuffer(buffer, count, dataBytes.Length);
+ }
+ /// <summary>
+ /// Gets a buffer texture with a given format, for the given index.
+ /// </summary>
+ /// <param name="index">Index of the buffer texture</param>
+ /// <param name="format">Format of the buffer texture</param>
+ /// <returns>Buffer texture</returns>
+ public ITexture EnsureBufferTexture(int index, Format format)
+ {
+ return (_bufferTextures[index] ??= new()).Get(_context.Renderer, format);
+ }
+ /// <summary>
+ /// Gets the offset and size of usable storage on the output vertex buffer.
+ /// </summary>
+ /// <param name="size">Size in bytes that will be used</param>
+ /// <returns>Usable offset and size on the buffer</returns>
+ public (int, int) GetVertexDataBuffer(int size)
+ {
+ return EnsureBuffer(ref _vertexDataBuffer, size);
+ }
+ /// <summary>
+ /// Gets the offset and size of usable storage on the output geometry shader vertex buffer.
+ /// </summary>
+ /// <param name="size">Size in bytes that will be used</param>
+ /// <returns>Usable offset and size on the buffer</returns>
+ public (int, int) GetGeometryVertexDataBuffer(int size)
+ {
+ return EnsureBuffer(ref _geometryVertexDataBuffer, size);
+ }
+ /// <summary>
+ /// Gets the offset and size of usable storage on the output geometry shader index buffer.
+ /// </summary>
+ /// <param name="size">Size in bytes that will be used</param>
+ /// <returns>Usable offset and size on the buffer</returns>
+ public (int, int) GetGeometryIndexDataBuffer(int size)
+ {
+ return EnsureBuffer(ref _geometryIndexDataBuffer, size);
+ }
+ /// <summary>
+ /// Gets a range of the output vertex buffer for binding.
+ /// </summary>
+ /// <param name="offset">Offset of the range</param>
+ /// <param name="size">Size of the range in bytes</param>
+ /// <returns>Range</returns>
+ public BufferRange GetVertexDataBufferRange(int offset, int size)
+ {
+ return new BufferRange(_vertexDataBuffer.Handle, offset, size);
+ }
+ /// <summary>
+ /// Gets a range of the output geometry shader vertex buffer for binding.
+ /// </summary>
+ /// <param name="offset">Offset of the range</param>
+ /// <param name="size">Size of the range in bytes</param>
+ /// <returns>Range</returns>
+ public BufferRange GetGeometryVertexDataBufferRange(int offset, int size)
+ {
+ return new BufferRange(_geometryVertexDataBuffer.Handle, offset, size);
+ }
+ /// <summary>
+ /// Gets a range of the output geometry shader index buffer for binding.
+ /// </summary>
+ /// <param name="offset">Offset of the range</param>
+ /// <param name="size">Size of the range in bytes</param>
+ /// <returns>Range</returns>
+ public BufferRange GetGeometryIndexDataBufferRange(int offset, int size)
+ {
+ return new BufferRange(_geometryIndexDataBuffer.Handle, offset, size);
+ }
+ /// <summary>
+ /// Gets the range for a dummy 16 bytes buffer, filled with zeros.
+ /// </summary>
+ /// <returns>Dummy buffer range</returns>
+ public BufferRange GetDummyBufferRange()
+ {
+ if (_dummyBuffer == BufferHandle.Null)
+ {
+ _dummyBuffer = _context.Renderer.CreateBuffer(DummyBufferSize);
+ _context.Renderer.Pipeline.ClearBuffer(_dummyBuffer, 0, DummyBufferSize, 0);
+ }
+ return new BufferRange(_dummyBuffer, 0, DummyBufferSize);
+ }
+ /// <summary>
+ /// Gets the range for a sequential index buffer, with ever incrementing index values.
+ /// </summary>
+ /// <param name="count">Minimum number of indices that the buffer should have</param>
+ /// <returns>Buffer handle</returns>
+ public BufferHandle GetSequentialIndexBuffer(int count)
+ {
+ if (_sequentialIndexBufferCount < count)
+ {
+ if (_sequentialIndexBuffer != BufferHandle.Null)
+ {
+ _context.Renderer.DeleteBuffer(_sequentialIndexBuffer);
+ }
+ _sequentialIndexBuffer = _context.Renderer.CreateBuffer(count * sizeof(uint));
+ _sequentialIndexBufferCount = count;
+ Span<int> data = new int[count];
+ for (int index = 0; index < count; index++)
+ {
+ data[index] = index;
+ }
+ _context.Renderer.SetBufferData(_sequentialIndexBuffer, 0, MemoryMarshal.Cast<int, byte>(data));
+ }
+ return _sequentialIndexBuffer;
+ }
+ /// <summary>
+ /// Ensure that a buffer exists, is large enough, and allocates a sub-region of the specified size inside the buffer.
+ /// </summary>
+ /// <param name="buffer">Buffer state</param>
+ /// <param name="size">Required size in bytes</param>
+ /// <returns>Allocated offset and size</returns>
+ private (int, int) EnsureBuffer(ref Buffer buffer, int size)
+ {
+ int newSize = buffer.Offset + size;
+ if (buffer.Size < newSize)
+ {
+ if (buffer.Handle != BufferHandle.Null)
+ {
+ _context.Renderer.DeleteBuffer(buffer.Handle);
+ }
+ buffer.Handle = _context.Renderer.CreateBuffer(newSize);
+ buffer.Size = newSize;
+ }
+ int offset = buffer.Offset;
+ buffer.Offset = BitUtils.AlignUp(newSize, _context.Capabilities.StorageBufferOffsetAlignment);
+ return (offset, size);
+ }
+ /// <summary>
+ /// Frees all buffer sub-regions that were previously allocated.
+ /// </summary>
+ public void FreeBuffers()
+ {
+ _vertexDataBuffer.Offset = 0;
+ _geometryVertexDataBuffer.Offset = 0;
+ _geometryIndexDataBuffer.Offset = 0;
+ }
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ for (int index = 0; index < _bufferTextures.Length; index++)
+ {
+ _bufferTextures[index]?.Dispose();
+ _bufferTextures[index] = null;
+ }
+ DestroyIfNotNull(ref _dummyBuffer);
+ DestroyIfNotNull(ref _vertexDataBuffer.Handle);
+ DestroyIfNotNull(ref _geometryVertexDataBuffer.Handle);
+ DestroyIfNotNull(ref _geometryIndexDataBuffer.Handle);
+ DestroyIfNotNull(ref _sequentialIndexBuffer);
+ foreach (var indexBuffer in _topologyRemapBuffers.Values)
+ {
+ _context.Renderer.DeleteBuffer(indexBuffer.Handle);
+ }
+ _topologyRemapBuffers.Clear();
+ }
+ }
+ /// <summary>
+ /// Deletes a buffer if the handle is valid (not null), then sets the handle to null.
+ /// </summary>
+ /// <param name="handle">Buffer handle</param>
+ private void DestroyIfNotNull(ref BufferHandle handle)
+ {
+ if (handle != BufferHandle.Null)
+ {
+ _context.Renderer.DeleteBuffer(handle);
+ handle = BufferHandle.Null;
+ }
+ }
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs
new file mode 100644
index 00000000..59a92508
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs
@@ -0,0 +1,535 @@
+using Ryujinx.Common;
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Gpu.Shader;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw
+ /// <summary>
+ /// Vertex, tessellation and geometry as compute shader state.
+ /// </summary>
+ struct VtgAsComputeState
+ {
+ private const int ComputeLocalSize = 32;
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly DeviceStateWithShadow<ThreedClassState> _state;
+ private readonly VtgAsComputeContext _vacContext;
+ private readonly ThreedClass _engine;
+ private readonly ShaderAsCompute _vertexAsCompute;
+ private readonly ShaderAsCompute _geometryAsCompute;
+ private readonly IProgram _vertexPassthroughProgram;
+ private readonly PrimitiveTopology _topology;
+ private readonly int _count;
+ private readonly int _instanceCount;
+ private readonly int _firstIndex;
+ private readonly int _firstVertex;
+ private readonly int _firstInstance;
+ private readonly bool _indexed;
+ private readonly int _vertexDataOffset;
+ private readonly int _vertexDataSize;
+ private readonly int _geometryVertexDataOffset;
+ private readonly int _geometryVertexDataSize;
+ private readonly int _geometryIndexDataOffset;
+ private readonly int _geometryIndexDataSize;
+ private readonly int _geometryIndexDataCount;
+ /// <summary>
+ /// Creates a new vertex, tessellation and geometry as compute shader state.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="state">3D engine state</param>
+ /// <param name="vacContext">Vertex as compute context</param>
+ /// <param name="engine">3D engine</param>
+ /// <param name="vertexAsCompute">Vertex shader converted to compute</param>
+ /// <param name="geometryAsCompute">Optional geometry shader converted to compute</param>
+ /// <param name="vertexPassthroughProgram">Fragment shader with a vertex passthrough shader to feed the compute output into the fragment stage</param>
+ /// <param name="topology">Primitive topology of the draw</param>
+ /// <param name="count">Index or vertex count of the draw</param>
+ /// <param name="instanceCount">Instance count</param>
+ /// <param name="firstIndex">First index on the index buffer, for indexed draws</param>
+ /// <param name="firstVertex">First vertex on the vertex buffer</param>
+ /// <param name="firstInstance">First instance</param>
+ /// <param name="indexed">Whether the draw is indexed</param>
+ public VtgAsComputeState(
+ GpuContext context,
+ GpuChannel channel,
+ DeviceStateWithShadow<ThreedClassState> state,
+ VtgAsComputeContext vacContext,
+ ThreedClass engine,
+ ShaderAsCompute vertexAsCompute,
+ ShaderAsCompute geometryAsCompute,
+ IProgram vertexPassthroughProgram,
+ PrimitiveTopology topology,
+ int count,
+ int instanceCount,
+ int firstIndex,
+ int firstVertex,
+ int firstInstance,
+ bool indexed)
+ {
+ _context = context;
+ _channel = channel;
+ _state = state;
+ _vacContext = vacContext;
+ _engine = engine;
+ _vertexAsCompute = vertexAsCompute;
+ _geometryAsCompute = geometryAsCompute;
+ _vertexPassthroughProgram = vertexPassthroughProgram;
+ _topology = topology;
+ _count = count;
+ _instanceCount = instanceCount;
+ _firstIndex = firstIndex;
+ _firstVertex = firstVertex;
+ _firstInstance = firstInstance;
+ _indexed = indexed;
+ int vertexDataSize = vertexAsCompute.Reservations.OutputSizeInBytesPerInvocation * count * instanceCount;
+ (_vertexDataOffset, _vertexDataSize) = _vacContext.GetVertexDataBuffer(vertexDataSize);
+ if (geometryAsCompute != null)
+ {
+ int totalPrimitivesCount = VtgAsComputeContext.GetPrimitivesCount(topology, count * instanceCount);
+ int maxCompleteStrips = GetMaxCompleteStrips(geometryAsCompute.Info.GeometryVerticesPerPrimitive, geometryAsCompute.Info.GeometryMaxOutputVertices);
+ int totalVerticesCount = totalPrimitivesCount * geometryAsCompute.Info.GeometryMaxOutputVertices * geometryAsCompute.Info.ThreadsPerInputPrimitive;
+ int geometryVbDataSize = totalVerticesCount * geometryAsCompute.Reservations.OutputSizeInBytesPerInvocation;
+ int geometryIbDataCount = totalVerticesCount + totalPrimitivesCount * maxCompleteStrips;
+ int geometryIbDataSize = geometryIbDataCount * sizeof(uint);
+ (_geometryVertexDataOffset, _geometryVertexDataSize) = vacContext.GetGeometryVertexDataBuffer(geometryVbDataSize);
+ (_geometryIndexDataOffset, _geometryIndexDataSize) = vacContext.GetGeometryIndexDataBuffer(geometryIbDataSize);
+ _geometryIndexDataCount = geometryIbDataCount;
+ }
+ }
+ /// <summary>
+ /// Emulates the vertex stage using compute.
+ /// </summary>
+ public readonly void RunVertex()
+ {
+ _context.Renderer.Pipeline.SetProgram(_vertexAsCompute.HostProgram);
+ int primitivesCount = VtgAsComputeContext.GetPrimitivesCount(_topology, _count);
+ _vacContext.VertexInfoBufferUpdater.SetVertexCounts(_count, _instanceCount, _firstVertex, _firstInstance);
+ _vacContext.VertexInfoBufferUpdater.SetGeometryCounts(primitivesCount);
+ for (int index = 0; index < Constants.TotalVertexAttribs; index++)
+ {
+ var vertexAttrib = _state.State.VertexAttribState[index];
+ if (!FormatTable.TryGetSingleComponentAttribFormat(vertexAttrib.UnpackFormat(), out Format format, out int componentsCount))
+ {
+ Logger.Debug?.Print(LogClass.Gpu, $"Invalid attribute format 0x{vertexAttrib.UnpackFormat():X}.");
+ format = vertexAttrib.UnpackType() switch
+ {
+ VertexAttribType.Sint => Format.R32Sint,
+ VertexAttribType.Uint => Format.R32Uint,
+ _ => Format.R32Float
+ };
+ componentsCount = 4;
+ }
+ if (vertexAttrib.UnpackIsConstant())
+ {
+ _vacContext.VertexInfoBufferUpdater.SetVertexStride(index, 0, componentsCount);
+ _vacContext.VertexInfoBufferUpdater.SetVertexOffset(index, 0, 0);
+ SetDummyBufferTexture(_vertexAsCompute.Reservations, index, format);
+ continue;
+ }
+ int bufferIndex = vertexAttrib.UnpackBufferIndex();
+ GpuVa endAddress = _state.State.VertexBufferEndAddress[bufferIndex];
+ var vertexBuffer = _state.State.VertexBufferState[bufferIndex];
+ bool instanced = _state.State.VertexBufferInstanced[bufferIndex];
+ ulong address = vertexBuffer.Address.Pack();
+ if (!vertexBuffer.UnpackEnable() || !_channel.MemoryManager.IsMapped(address))
+ {
+ _vacContext.VertexInfoBufferUpdater.SetVertexStride(index, 0, componentsCount);
+ _vacContext.VertexInfoBufferUpdater.SetVertexOffset(index, 0, 0);
+ SetDummyBufferTexture(_vertexAsCompute.Reservations, index, format);
+ continue;
+ }
+ int vbStride = vertexBuffer.UnpackStride();
+ ulong vbSize = GetVertexBufferSize(address, endAddress.Pack(), vbStride, _indexed, instanced, _firstVertex, _count);
+ ulong oldVbSize = vbSize;
+ ulong attributeOffset = (ulong)vertexAttrib.UnpackOffset();
+ int componentSize = format.GetScalarSize();
+ address += attributeOffset;
+ ulong misalign = address & ((ulong)_context.Capabilities.TextureBufferOffsetAlignment - 1);
+ vbSize = Align(vbSize - attributeOffset + misalign, componentSize);
+ SetBufferTexture(_vertexAsCompute.Reservations, index, format, address - misalign, vbSize);
+ _vacContext.VertexInfoBufferUpdater.SetVertexStride(index, vbStride / componentSize, componentsCount);
+ _vacContext.VertexInfoBufferUpdater.SetVertexOffset(index, (int)misalign / componentSize, instanced ? vertexBuffer.Divisor : 0);
+ }
+ if (_indexed)
+ {
+ SetIndexBufferTexture(_vertexAsCompute.Reservations, _firstIndex, _count, out int ibOffset);
+ _vacContext.VertexInfoBufferUpdater.SetIndexBufferOffset(ibOffset);
+ }
+ else
+ {
+ SetSequentialIndexBufferTexture(_vertexAsCompute.Reservations, _count);
+ _vacContext.VertexInfoBufferUpdater.SetIndexBufferOffset(0);
+ }
+ int vertexInfoBinding = _vertexAsCompute.Reservations.VertexInfoConstantBufferBinding;
+ BufferRange vertexInfoRange = new(_vacContext.VertexInfoBufferUpdater.Handle, 0, VertexInfoBuffer.RequiredSize);
+ _context.Renderer.Pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(vertexInfoBinding, vertexInfoRange) });
+ int vertexDataBinding = _vertexAsCompute.Reservations.VertexOutputStorageBufferBinding;
+ BufferRange vertexDataRange = _vacContext.GetVertexDataBufferRange(_vertexDataOffset, _vertexDataSize);
+ _context.Renderer.Pipeline.SetStorageBuffers(stackalloc[] { new BufferAssignment(vertexDataBinding, vertexDataRange) });
+ _vacContext.VertexInfoBufferUpdater.Commit();
+ _context.Renderer.Pipeline.DispatchCompute(
+ BitUtils.DivRoundUp(_count, ComputeLocalSize),
+ BitUtils.DivRoundUp(_instanceCount, ComputeLocalSize),
+ 1);
+ }
+ /// <summary>
+ /// Emulates the geometry stage using compute, if it exists, otherwise does nothing.
+ /// </summary>
+ public readonly void RunGeometry()
+ {
+ if (_geometryAsCompute == null)
+ {
+ return;
+ }
+ int primitivesCount = VtgAsComputeContext.GetPrimitivesCount(_topology, _count);
+ _vacContext.VertexInfoBufferUpdater.SetVertexCounts(_count, _instanceCount, _firstVertex, _firstInstance);
+ _vacContext.VertexInfoBufferUpdater.SetGeometryCounts(primitivesCount);
+ _vacContext.VertexInfoBufferUpdater.Commit();
+ int vertexInfoBinding = _vertexAsCompute.Reservations.VertexInfoConstantBufferBinding;
+ BufferRange vertexInfoRange = new(_vacContext.VertexInfoBufferUpdater.Handle, 0, VertexInfoBuffer.RequiredSize);
+ _context.Renderer.Pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(vertexInfoBinding, vertexInfoRange) });
+ int vertexDataBinding = _vertexAsCompute.Reservations.VertexOutputStorageBufferBinding;
+ // Wait until compute is done.
+ // TODO: Batch compute and draw operations to avoid pipeline stalls.
+ _context.Renderer.Pipeline.Barrier();
+ _context.Renderer.Pipeline.SetProgram(_geometryAsCompute.HostProgram);
+ SetTopologyRemapBufferTexture(_geometryAsCompute.Reservations, _topology, _count);
+ int geometryVbBinding = _geometryAsCompute.Reservations.GeometryVertexOutputStorageBufferBinding;
+ int geometryIbBinding = _geometryAsCompute.Reservations.GeometryIndexOutputStorageBufferBinding;
+ BufferRange vertexDataRange = _vacContext.GetVertexDataBufferRange(_vertexDataOffset, _vertexDataSize);
+ BufferRange vertexBuffer = _vacContext.GetGeometryVertexDataBufferRange(_geometryVertexDataOffset, _geometryVertexDataSize);
+ BufferRange indexBuffer = _vacContext.GetGeometryIndexDataBufferRange(_geometryIndexDataOffset, _geometryIndexDataSize);
+ _context.Renderer.Pipeline.SetStorageBuffers(stackalloc[]
+ {
+ new BufferAssignment(vertexDataBinding, vertexDataRange),
+ new BufferAssignment(geometryVbBinding, vertexBuffer),
+ new BufferAssignment(geometryIbBinding, indexBuffer),
+ });
+ _context.Renderer.Pipeline.DispatchCompute(
+ BitUtils.DivRoundUp(primitivesCount, ComputeLocalSize),
+ BitUtils.DivRoundUp(_instanceCount, ComputeLocalSize),
+ _geometryAsCompute.Info.ThreadsPerInputPrimitive);
+ }
+ /// <summary>
+ /// Performs a draw using the data produced on the vertex, tessellation and geometry stages,
+ /// if rasterizer discard is disabled.
+ /// </summary>
+ public readonly void RunFragment()
+ {
+ bool tfEnabled = _state.State.TfEnable;
+ if (!_state.State.RasterizeEnable && (!tfEnabled || !_context.Capabilities.SupportsTransformFeedback))
+ {
+ // No need to run fragment if rasterizer discard is enabled,
+ // and we are emulating transform feedback or transform feedback is disabled.
+ // Note: We might skip geometry shader here, but right now, this is fine,
+ // because the only cases that triggers VTG to compute are geometry shader
+ // being not supported, or the vertex pipeline doing store operations.
+ // If the geometry shader does not do any store and rasterizer discard is enabled, the geometry shader can be skipped.
+ // If the geometry shader does have stores, it would have been converted to compute too if stores are not supported.
+ return;
+ }
+ int vertexDataBinding = _vertexAsCompute.Reservations.VertexOutputStorageBufferBinding;
+ _context.Renderer.Pipeline.Barrier();
+ _vacContext.VertexInfoBufferUpdater.SetVertexCounts(_count, _instanceCount, _firstVertex, _firstInstance);
+ _vacContext.VertexInfoBufferUpdater.Commit();
+ if (_geometryAsCompute != null)
+ {
+ BufferRange vertexBuffer = _vacContext.GetGeometryVertexDataBufferRange(_geometryVertexDataOffset, _geometryVertexDataSize);
+ BufferRange indexBuffer = _vacContext.GetGeometryIndexDataBufferRange(_geometryIndexDataOffset, _geometryIndexDataSize);
+ _context.Renderer.Pipeline.SetProgram(_vertexPassthroughProgram);
+ _context.Renderer.Pipeline.SetIndexBuffer(indexBuffer, IndexType.UInt);
+ _context.Renderer.Pipeline.SetStorageBuffers(stackalloc[] { new BufferAssignment(vertexDataBinding, vertexBuffer) });
+ _context.Renderer.Pipeline.SetPrimitiveRestart(true, -1);
+ _context.Renderer.Pipeline.SetPrimitiveTopology(GetGeometryOutputTopology(_geometryAsCompute.Info.GeometryVerticesPerPrimitive));
+ _context.Renderer.Pipeline.DrawIndexed(_geometryIndexDataCount, 1, 0, 0, 0);
+ _engine.ForceStateDirtyByIndex(StateUpdater.IndexBufferStateIndex);
+ _engine.ForceStateDirtyByIndex(StateUpdater.PrimitiveRestartStateIndex);
+ }
+ else
+ {
+ BufferRange vertexDataRange = _vacContext.GetVertexDataBufferRange(_vertexDataOffset, _vertexDataSize);
+ _context.Renderer.Pipeline.SetProgram(_vertexPassthroughProgram);
+ _context.Renderer.Pipeline.SetStorageBuffers(stackalloc[] { new BufferAssignment(vertexDataBinding, vertexDataRange) });
+ _context.Renderer.Pipeline.Draw(_count, _instanceCount, 0, 0);
+ }
+ }
+ /// <summary>
+ /// Gets a strip primitive topology from the vertices per primitive count.
+ /// </summary>
+ /// <param name="verticesPerPrimitive">Vertices per primitive count</param>
+ /// <returns>Primitive topology</returns>
+ private static PrimitiveTopology GetGeometryOutputTopology(int verticesPerPrimitive)
+ {
+ return verticesPerPrimitive switch
+ {
+ 3 => PrimitiveTopology.TriangleStrip,
+ 2 => PrimitiveTopology.LineStrip,
+ _ => PrimitiveTopology.Points,
+ };
+ }
+ /// <summary>
+ /// Gets the maximum number of complete primitive strips for a vertex count.
+ /// </summary>
+ /// <param name="verticesPerPrimitive">Vertices per primitive count</param>
+ /// <param name="maxOutputVertices">Maximum geometry shader output vertices count</param>
+ /// <returns>Maximum number of complete primitive strips</returns>
+ private static int GetMaxCompleteStrips(int verticesPerPrimitive, int maxOutputVertices)
+ {
+ return maxOutputVertices / verticesPerPrimitive;
+ }
+ /// <summary>
+ /// Binds a dummy buffer as vertex buffer into a buffer texture.
+ /// </summary>
+ /// <param name="reservations">Shader resource binding reservations</param>
+ /// <param name="index">Buffer texture index</param>
+ /// <param name="format">Buffer texture format</param>
+ private readonly void SetDummyBufferTexture(ResourceReservations reservations, int index, Format format)
+ {
+ ITexture bufferTexture = _vacContext.EnsureBufferTexture(index + 2, format);
+ bufferTexture.SetStorage(_vacContext.GetDummyBufferRange());
+ _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.GetVertexBufferTextureBinding(index), bufferTexture, null);
+ }
+ /// <summary>
+ /// Binds a vertex buffer into a buffer texture.
+ /// </summary>
+ /// <param name="reservations">Shader resource binding reservations</param>
+ /// <param name="index">Buffer texture index</param>
+ /// <param name="format">Buffer texture format</param>
+ /// <param name="address">Address of the vertex buffer</param>
+ /// <param name="size">Size of the buffer in bytes</param>
+ private readonly void SetBufferTexture(ResourceReservations reservations, int index, Format format, ulong address, ulong size)
+ {
+ var memoryManager = _channel.MemoryManager;
+ address = memoryManager.Translate(address);
+ BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(address, size);
+ ITexture bufferTexture = _vacContext.EnsureBufferTexture(index + 2, format);
+ bufferTexture.SetStorage(range);
+ _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.GetVertexBufferTextureBinding(index), bufferTexture, null);
+ }
+ /// <summary>
+ /// Binds the index buffer into a buffer texture.
+ /// </summary>
+ /// <param name="reservations">Shader resource binding reservations</param>
+ /// <param name="firstIndex">First index of the index buffer</param>
+ /// <param name="count">Index count</param>
+ /// <param name="misalignedOffset">Offset that should be added when accessing the buffer texture on the shader</param>
+ private readonly void SetIndexBufferTexture(ResourceReservations reservations, int firstIndex, int count, out int misalignedOffset)
+ {
+ ulong address = _state.State.IndexBufferState.Address.Pack();
+ ulong indexOffset = (ulong)firstIndex;
+ ulong size = (ulong)count;
+ int shift = 0;
+ Format format = Format.R8Uint;
+ switch (_state.State.IndexBufferState.Type)
+ {
+ case IndexType.UShort:
+ shift = 1;
+ format = Format.R16Uint;
+ break;
+ case IndexType.UInt:
+ shift = 2;
+ format = Format.R32Uint;
+ break;
+ }
+ indexOffset <<= shift;
+ size <<= shift;
+ var memoryManager = _channel.MemoryManager;
+ address = memoryManager.Translate(address + indexOffset);
+ ulong misalign = address & ((ulong)_context.Capabilities.TextureBufferOffsetAlignment - 1);
+ BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(address - misalign, size + misalign);
+ misalignedOffset = (int)misalign >> shift;
+ SetIndexBufferTexture(reservations, range, format);
+ }
+ /// <summary>
+ /// Sets the host buffer texture for the index buffer.
+ /// </summary>
+ /// <param name="reservations">Shader resource binding reservations</param>
+ /// <param name="range">Index buffer range</param>
+ /// <param name="format">Index buffer format</param>
+ private readonly void SetIndexBufferTexture(ResourceReservations reservations, BufferRange range, Format format)
+ {
+ ITexture bufferTexture = _vacContext.EnsureBufferTexture(0, format);
+ bufferTexture.SetStorage(range);
+ _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.IndexBufferTextureBinding, bufferTexture, null);
+ }
+ /// <summary>
+ /// Sets the host buffer texture for the topology remap buffer.
+ /// </summary>
+ /// <param name="reservations">Shader resource binding reservations</param>
+ /// <param name="topology">Input topology</param>
+ /// <param name="count">Input vertex count</param>
+ private readonly void SetTopologyRemapBufferTexture(ResourceReservations reservations, PrimitiveTopology topology, int count)
+ {
+ ITexture bufferTexture = _vacContext.EnsureBufferTexture(1, Format.R32Uint);
+ bufferTexture.SetStorage(_vacContext.GetOrCreateTopologyRemapBuffer(topology, count));
+ _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.TopologyRemapBufferTextureBinding, bufferTexture, null);
+ }
+ /// <summary>
+ /// Sets the host buffer texture to a generated sequential index buffer.
+ /// </summary>
+ /// <param name="reservations">Shader resource binding reservations</param>
+ /// <param name="count">Vertex count</param>
+ private readonly void SetSequentialIndexBufferTexture(ResourceReservations reservations, int count)
+ {
+ BufferHandle sequentialIndexBuffer = _vacContext.GetSequentialIndexBuffer(count);
+ ITexture bufferTexture = _vacContext.EnsureBufferTexture(0, Format.R32Uint);
+ bufferTexture.SetStorage(new BufferRange(sequentialIndexBuffer, 0, count * sizeof(uint)));
+ _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.IndexBufferTextureBinding, bufferTexture, null);
+ }
+ /// <summary>
+ /// Gets the size of a vertex buffer based on the current 3D engine state.
+ /// </summary>
+ /// <param name="vbAddress">Vertex buffer address</param>
+ /// <param name="vbEndAddress">Vertex buffer end address (exclusive)</param>
+ /// <param name="vbStride">Vertex buffer stride</param>
+ /// <param name="indexed">Whether the draw is indexed</param>
+ /// <param name="instanced">Whether the draw is instanced</param>
+ /// <param name="firstVertex">First vertex index</param>
+ /// <param name="vertexCount">Vertex count</param>
+ /// <returns>Size of the vertex buffer, in bytes</returns>
+ private readonly ulong GetVertexBufferSize(ulong vbAddress, ulong vbEndAddress, int vbStride, bool indexed, bool instanced, int firstVertex, int vertexCount)
+ {
+ IndexType indexType = _state.State.IndexBufferState.Type;
+ bool indexTypeSmall = indexType == IndexType.UByte || indexType == IndexType.UShort;
+ ulong vbSize = vbEndAddress - vbAddress + 1;
+ ulong size;
+ if (indexed || vbStride == 0 || instanced)
+ {
+ // This size may be (much) larger than the real vertex buffer size.
+ // Avoid calculating it this way, unless we don't have any other option.
+ size = vbSize;
+ if (vbStride > 0 && indexTypeSmall && indexed && !instanced)
+ {
+ // If the index type is a small integer type, then we might be still able
+ // to reduce the vertex buffer size based on the maximum possible index value.
+ ulong maxVertexBufferSize = indexType == IndexType.UByte ? 0x100UL : 0x10000UL;
+ maxVertexBufferSize += _state.State.FirstVertex;
+ maxVertexBufferSize *= (uint)vbStride;
+ size = Math.Min(size, maxVertexBufferSize);
+ }
+ }
+ else
+ {
+ // For non-indexed draws, we can guess the size from the vertex count
+ // and stride.
+ int firstInstance = (int)_state.State.FirstInstance;
+ size = Math.Min(vbSize, (ulong)((firstInstance + firstVertex + vertexCount) * vbStride));
+ }
+ return size;
+ }
+ /// <summary>
+ /// Aligns a size to a given alignment value.
+ /// </summary>
+ /// <param name="size">Size</param>
+ /// <param name="alignment">Alignment</param>
+ /// <returns>Aligned size</returns>
+ private static ulong Align(ulong size, int alignment)
+ {
+ ulong align = (ulong)alignment;
+ size += align - 1;
+ size /= align;
+ size *= align;
+ return size;
+ }
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs
index d7ee24b1..18e7ac00 100644
--- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs
@@ -1,4 +1,5 @@
using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw;
using Ryujinx.Graphics.Gpu.Engine.Types;
using Ryujinx.Graphics.Gpu.Memory;
using System;
@@ -8,7 +9,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// <summary>
/// Draw manager.
/// </summary>
- class DrawManager
+ class DrawManager : IDisposable
// Since we don't know the index buffer size for indirect draws,
// we must assume a minimum and maximum size and use that for buffer data update purposes.
@@ -20,6 +21,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
private readonly DeviceStateWithShadow<ThreedClassState> _state;
private readonly DrawState _drawState;
private readonly SpecializationStateUpdater _currentSpecState;
+ private readonly VtgAsCompute _vtgAsCompute;
private bool _topologySet;
private bool _instancedDrawPending;
@@ -53,6 +55,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
_state = state;
_drawState = drawState;
_currentSpecState = spec;
+ _vtgAsCompute = new(context, channel, state);
/// <summary>
@@ -127,7 +130,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
if (renderEnable == ConditionalRenderEnabled.False)
- PerformDeferredDraws();
+ PerformDeferredDraws(engine);
_drawState.DrawIndexed = false;
@@ -190,13 +193,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
_channel.BufferManager.SetIndexBuffer(br, IndexType.UInt);
- _context.Renderer.Pipeline.DrawIndexed(inlineIndexCount, 1, firstIndex, firstVertex, firstInstance);
+ DrawImpl(engine, inlineIndexCount, 1, firstIndex, firstVertex, firstInstance, indexed: true);
else if (_drawState.DrawIndexed)
int firstVertex = (int)_state.State.FirstVertex;
- _context.Renderer.Pipeline.DrawIndexed(indexCount, 1, firstIndex, firstVertex, firstInstance);
+ DrawImpl(engine, indexCount, 1, firstIndex, firstVertex, firstInstance, indexed: true);
@@ -204,7 +207,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
var drawState = _state.State.VertexBufferDrawState;
#pragma warning restore IDE0059
- _context.Renderer.Pipeline.Draw(drawVertexCount, 1, drawFirstVertex, firstInstance);
+ DrawImpl(engine, drawVertexCount, 1, 0, drawFirstVertex, firstInstance, indexed: false);
_drawState.DrawIndexed = false;
@@ -219,24 +222,26 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// Starts draw.
/// This sets primitive type and instanced draw parameters.
/// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
/// <param name="argument">Method call argument</param>
- public void DrawBegin(int argument)
+ public void DrawBegin(ThreedClass engine, int argument)
bool incrementInstance = (argument & (1 << 26)) != 0;
bool resetInstance = (argument & (1 << 27)) == 0;
PrimitiveType type = (PrimitiveType)(argument & 0xffff);
- DrawBegin(incrementInstance, resetInstance, type);
+ DrawBegin(engine, incrementInstance, resetInstance, type);
/// <summary>
/// Starts draw.
/// This sets primitive type and instanced draw parameters.
/// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
/// <param name="incrementInstance">Indicates if the current instance should be incremented</param>
/// <param name="resetInstance">Indicates if the current instance should be set to zero</param>
/// <param name="primitiveType">Primitive type</param>
- private void DrawBegin(bool incrementInstance, bool resetInstance, PrimitiveType primitiveType)
+ private void DrawBegin(ThreedClass engine, bool incrementInstance, bool resetInstance, PrimitiveType primitiveType)
if (incrementInstance)
@@ -244,7 +249,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
else if (resetInstance)
- PerformDeferredDraws();
+ PerformDeferredDraws(engine);
_instanceIndex = 0;
@@ -364,7 +369,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// <param name="instanced">True to increment the current instance value, false otherwise</param>
private void DrawIndexBufferBeginEndInstance(ThreedClass engine, int argument, bool instanced)
- DrawBegin(instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf));
+ DrawBegin(engine, instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf));
int firstIndex = argument & 0xffff;
int indexCount = (argument >> 16) & 0xfff;
@@ -409,7 +414,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// <param name="instanced">True to increment the current instance value, false otherwise</param>
private void DrawVertexArrayBeginEndInstance(ThreedClass engine, int argument, bool instanced)
- DrawBegin(instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf));
+ DrawBegin(engine, instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf));
int firstVertex = argument & 0xffff;
int vertexCount = (argument >> 16) & 0xfff;
@@ -541,23 +546,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
- if (instanceCount > 1)
- {
- // Must be called after UpdateState as it assumes the shader state
- // has already been set, and that bindings have been updated already.
- _channel.BufferManager.SetInstancedDrawVertexCount(count);
- }
+ DrawImpl(engine, count, instanceCount, firstIndex, firstVertex, firstInstance, indexed);
if (indexed)
- _context.Renderer.Pipeline.DrawIndexed(count, instanceCount, firstIndex, firstVertex, firstInstance);
_state.State.FirstVertex = 0;
- else
- {
- _context.Renderer.Pipeline.Draw(count, instanceCount, firstVertex, firstInstance);
- }
_state.State.FirstInstance = 0;
@@ -570,6 +564,67 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// <summary>
+ /// Performs a indexed or non-indexed draw.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="count">Index count for indexed draws, vertex count for non-indexed draws</param>
+ /// <param name="instanceCount">Instance count</param>
+ /// <param name="firstIndex">First index on the index buffer for indexed draws, ignored for non-indexed draws</param>
+ /// <param name="firstVertex">First vertex on the vertex buffer</param>
+ /// <param name="firstInstance">First instance</param>
+ /// <param name="indexed">True if the draw is indexed, false otherwise</param>
+ private void DrawImpl(
+ ThreedClass engine,
+ int count,
+ int instanceCount,
+ int firstIndex,
+ int firstVertex,
+ int firstInstance,
+ bool indexed)
+ {
+ if (instanceCount > 1)
+ {
+ _channel.BufferManager.SetInstancedDrawVertexCount(count);
+ }
+ if (_drawState.VertexAsCompute != null)
+ {
+ _vtgAsCompute.DrawAsCompute(
+ engine,
+ _drawState.VertexAsCompute,
+ _drawState.GeometryAsCompute,
+ _drawState.VertexPassthrough,
+ _drawState.Topology,
+ count,
+ instanceCount,
+ firstIndex,
+ firstVertex,
+ firstInstance,
+ indexed);
+ if (_drawState.GeometryAsCompute != null)
+ {
+ // Geometry draws need to change the topology, so we need to set it here again
+ // if we are going to do a regular draw.
+ // Would have been better to do that on the callee, but doing it here
+ // avoids having to pass the draw manager instance.
+ ForceStateDirty();
+ }
+ }
+ else
+ {
+ if (indexed)
+ {
+ _context.Renderer.Pipeline.DrawIndexed(count, instanceCount, firstIndex, firstVertex, firstInstance);
+ }
+ else
+ {
+ _context.Renderer.Pipeline.Draw(count, instanceCount, firstVertex, firstInstance);
+ }
+ }
+ }
+ /// <summary>
/// Performs a indirect draw, with parameters from a GPU buffer.
/// </summary>
/// <param name="engine">3D engine where this method is being called</param>
@@ -667,43 +722,42 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// Once we detect the last instanced draw, then we perform the host instanced draw,
/// with the accumulated instance count.
/// </summary>
- public void PerformDeferredDraws()
+ /// <param name="engine">3D engine where this method is being called</param>
+ public void PerformDeferredDraws(ThreedClass engine)
// Perform any pending instanced draw.
if (_instancedDrawPending)
_instancedDrawPending = false;
+ int instanceCount = _instanceIndex + 1;
+ int firstInstance = _instancedFirstInstance;
bool indexedInline = _instancedIndexedInline;
if (_instancedIndexed || indexedInline)
+ int indexCount = _instancedIndexCount;
if (indexedInline)
int inlineIndexCount = _drawState.IbStreamer.GetAndResetInlineIndexCount(_context.Renderer);
BufferRange br = new(_drawState.IbStreamer.GetInlineIndexBuffer(), 0, inlineIndexCount * 4);
_channel.BufferManager.SetIndexBuffer(br, IndexType.UInt);
+ indexCount = inlineIndexCount;
- _channel.BufferManager.SetInstancedDrawVertexCount(_instancedIndexCount);
+ int firstIndex = _instancedFirstIndex;
+ int firstVertex = _instancedFirstVertex;
- _context.Renderer.Pipeline.DrawIndexed(
- _instancedIndexCount,
- _instanceIndex + 1,
- _instancedFirstIndex,
- _instancedFirstVertex,
- _instancedFirstInstance);
+ DrawImpl(engine, indexCount, instanceCount, firstIndex, firstVertex, firstInstance, indexed: true);
- _channel.BufferManager.SetInstancedDrawVertexCount(_instancedDrawStateCount);
+ int vertexCount = _instancedDrawStateCount;
+ int firstVertex = _instancedDrawStateFirst;
- _context.Renderer.Pipeline.Draw(
- _instancedDrawStateCount,
- _instanceIndex + 1,
- _instancedDrawStateFirst,
- _instancedFirstInstance);
+ DrawImpl(engine, vertexCount, instanceCount, 0, firstVertex, firstInstance, indexed: false);
@@ -866,5 +920,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ _vtgAsCompute.Dispose();
+ }
+ }
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs
index 12099aef..cb43b002 100644
--- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs
@@ -1,4 +1,5 @@
using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Shader;
namespace Ryujinx.Graphics.Gpu.Engine.Threed
@@ -61,5 +62,20 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// Index buffer data streamer for inline index buffer updates, such as those used in legacy OpenGL.
/// </summary>
public IbStreamer IbStreamer = new();
+ /// <summary>
+ /// If the vertex shader is emulated on compute, this should be set to the compute program, otherwise it should be null.
+ /// </summary>
+ public ShaderAsCompute VertexAsCompute;
+ /// <summary>
+ /// If a geometry shader exists and is emulated on compute, this should be set to the compute program, otherwise it should be null.
+ /// </summary>
+ public ShaderAsCompute GeometryAsCompute;
+ /// <summary>
+ /// If the vertex shader is emulated on compute, this should be set to the passthrough vertex program, otherwise it should be null.
+ /// </summary>
+ public IProgram VertexPassthrough;
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs
index e0607fbf..4fbbee3b 100644
--- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs
@@ -218,11 +218,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
bool changed = false;
ref Array32<AttributeType> attributeTypes = ref _graphics.AttributeTypes;
- bool supportsScaledFormats = _context.Capabilities.SupportsScaledVertexFormats;
+ bool mayConvertVtgToCompute = ShaderCache.MayConvertVtgToCompute(ref _context.Capabilities);
+ bool supportsScaledFormats = _context.Capabilities.SupportsScaledVertexFormats && !mayConvertVtgToCompute;
for (int location = 0; location < state.Length; location++)
VertexAttribType type = state[location].UnpackType();
+ VertexAttribSize size = state[location].UnpackSize();
AttributeType value;
@@ -247,6 +249,18 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
+ if (mayConvertVtgToCompute && (size == VertexAttribSize.Rgb10A2 || size == VertexAttribSize.Rg11B10))
+ {
+ value |= AttributeType.Packed;
+ if (type == VertexAttribType.Snorm ||
+ type == VertexAttribType.Sint ||
+ type == VertexAttribType.Sscaled)
+ {
+ value |= AttributeType.PackedRgb10A2Signed;
+ }
+ }
if (attributeTypes[location] != value)
attributeTypes[location] = value;
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
index 1f919d9b..48a497b5 100644
--- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
@@ -20,6 +20,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
public const int RasterizerStateIndex = 15;
public const int ScissorStateIndex = 16;
public const int VertexBufferStateIndex = 0;
+ public const int IndexBufferStateIndex = 23;
public const int PrimitiveRestartStateIndex = 12;
public const int RenderTargetStateIndex = 27;
@@ -290,7 +291,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
// of the shader for the new state.
if (_shaderSpecState != null && _currentSpecState.HasChanged())
- if (!_shaderSpecState.MatchesGraphics(_channel, ref _currentSpecState.GetPoolState(), ref _currentSpecState.GetGraphicsState(), _vsUsesDrawParameters, false))
+ if (!_shaderSpecState.MatchesGraphics(
+ _channel,
+ ref _currentSpecState.GetPoolState(),
+ ref _currentSpecState.GetGraphicsState(),
+ _drawState.VertexAsCompute != null,
+ _vsUsesDrawParameters,
+ checkTextures: false))
// Shader must be reloaded. _vtgWritesRtLayer should not change.
@@ -1453,6 +1460,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
_fsReadsFragCoord = false;
+ if (gs.VertexAsCompute != null)
+ {
+ _drawState.VertexAsCompute = gs.VertexAsCompute;
+ _drawState.GeometryAsCompute = gs.GeometryAsCompute;
+ _drawState.VertexPassthrough = gs.HostProgram;
+ }
+ else
+ {
+ _drawState.VertexAsCompute = null;
+ _drawState.GeometryAsCompute = null;
+ _drawState.VertexPassthrough = null;
+ }
@@ -1540,5 +1560,14 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
+ /// <summary>
+ /// Forces a register group as dirty, by index.
+ /// </summary>
+ /// <param name="groupIndex">Index of the group to be dirtied</param>
+ public void ForceDirty(int groupIndex)
+ {
+ _updateTracker.ForceDirty(groupIndex);
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
index 1f662890..7bc2970f 100644
--- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
@@ -13,7 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// <summary>
/// Represents a 3D engine class.
/// </summary>
- class ThreedClass : IDeviceState
+ class ThreedClass : IDeviceState, IDisposable
private readonly GpuContext _context;
private readonly GPFifoClass _fifoClass;
@@ -179,6 +179,15 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// <summary>
+ /// Marks the specified register range for a group index as dirty, forcing the associated state to update on the next draw.
+ /// </summary>
+ /// <param name="groupIndex">Index of the group to dirty</param>
+ public void ForceStateDirtyByIndex(int groupIndex)
+ {
+ _stateUpdater.ForceDirty(groupIndex);
+ }
+ /// <summary>
/// Forces the shaders to be rebound on the next draw.
/// </summary>
public void ForceShaderUpdate()
@@ -207,7 +216,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// </summary>
public void PerformDeferredDraws()
- _drawManager.PerformDeferredDraws();
+ _drawManager.PerformDeferredDraws(this);
/// <summary>
@@ -402,7 +411,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// <param name="argument">Method call argument</param>
private void DrawBegin(int argument)
- _drawManager.DrawBegin(argument);
+ _drawManager.DrawBegin(this, argument);
/// <summary>
@@ -617,5 +626,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
_drawManager.Clear(this, argument, layerCount);
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ _drawManager.Dispose();
+ }
+ }
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/GpuChannel.cs b/src/Ryujinx.Graphics.Gpu/GpuChannel.cs
index 8fe64381..d70c9645 100644
--- a/src/Ryujinx.Graphics.Gpu/GpuChannel.cs
+++ b/src/Ryujinx.Graphics.Gpu/GpuChannel.cs
@@ -135,6 +135,7 @@ namespace Ryujinx.Graphics.Gpu
/// </summary>
private void Destroy()
+ _processor.Dispose();
var oldMemoryManager = Interlocked.Exchange(ref _memoryManager, null);
diff --git a/src/Ryujinx.Graphics.Gpu/Image/FormatTable.cs b/src/Ryujinx.Graphics.Gpu/Image/FormatTable.cs
index fcc6b8cf..1b517e63 100644
--- a/src/Ryujinx.Graphics.Gpu/Image/FormatTable.cs
+++ b/src/Ryujinx.Graphics.Gpu/Image/FormatTable.cs
@@ -557,6 +557,91 @@ namespace Ryujinx.Graphics.Gpu.Image
#pragma warning restore IDE0055
+ // Note: Some of those formats have been changed and requires conversion on the shader,
+ // as GPUs don't support them when used as buffer texture format.
+ private static readonly Dictionary<VertexAttributeFormat, (Format, int)> _singleComponentAttribFormats = new()
+ {
+ { VertexAttributeFormat.R8Unorm, (Format.R8Unorm, 1) },
+ { VertexAttributeFormat.R8Snorm, (Format.R8Snorm, 1) },
+ { VertexAttributeFormat.R8Uint, (Format.R8Uint, 1) },
+ { VertexAttributeFormat.R8Sint, (Format.R8Sint, 1) },
+ { VertexAttributeFormat.R16Float, (Format.R16Float, 1) },
+ { VertexAttributeFormat.R16Unorm, (Format.R16Unorm, 1) },
+ { VertexAttributeFormat.R16Snorm, (Format.R16Snorm, 1) },
+ { VertexAttributeFormat.R16Uint, (Format.R16Uint, 1) },
+ { VertexAttributeFormat.R16Sint, (Format.R16Sint, 1) },
+ { VertexAttributeFormat.R32Float, (Format.R32Float, 1) },
+ { VertexAttributeFormat.R32Uint, (Format.R32Uint, 1) },
+ { VertexAttributeFormat.R32Sint, (Format.R32Sint, 1) },
+ { VertexAttributeFormat.R8G8Unorm, (Format.R8Unorm, 2) },
+ { VertexAttributeFormat.R8G8Snorm, (Format.R8Snorm, 2) },
+ { VertexAttributeFormat.R8G8Uint, (Format.R8Uint, 2) },
+ { VertexAttributeFormat.R8G8Sint, (Format.R8Sint, 2) },
+ { VertexAttributeFormat.R16G16Float, (Format.R16Float, 2) },
+ { VertexAttributeFormat.R16G16Unorm, (Format.R16Unorm, 2) },
+ { VertexAttributeFormat.R16G16Snorm, (Format.R16Snorm, 2) },
+ { VertexAttributeFormat.R16G16Uint, (Format.R16Uint, 2) },
+ { VertexAttributeFormat.R16G16Sint, (Format.R16Sint, 2) },
+ { VertexAttributeFormat.R32G32Float, (Format.R32Float, 2) },
+ { VertexAttributeFormat.R32G32Uint, (Format.R32Uint, 2) },
+ { VertexAttributeFormat.R32G32Sint, (Format.R32Sint, 2) },
+ { VertexAttributeFormat.R8G8B8Unorm, (Format.R8Unorm, 3) },
+ { VertexAttributeFormat.R8G8B8Snorm, (Format.R8Snorm, 3) },
+ { VertexAttributeFormat.R8G8B8Uint, (Format.R8Uint, 3) },
+ { VertexAttributeFormat.R8G8B8Sint, (Format.R8Sint, 3) },
+ { VertexAttributeFormat.R16G16B16Float, (Format.R16Float, 3) },
+ { VertexAttributeFormat.R16G16B16Unorm, (Format.R16Unorm, 3) },
+ { VertexAttributeFormat.R16G16B16Snorm, (Format.R16Snorm, 3) },
+ { VertexAttributeFormat.R16G16B16Uint, (Format.R16Uint, 3) },
+ { VertexAttributeFormat.R16G16B16Sint, (Format.R16Sint, 3) },
+ { VertexAttributeFormat.R32G32B32Float, (Format.R32Float, 3) },
+ { VertexAttributeFormat.R32G32B32Uint, (Format.R32Uint, 3) },
+ { VertexAttributeFormat.R32G32B32Sint, (Format.R32Sint, 3) },
+ { VertexAttributeFormat.R8G8B8A8Unorm, (Format.R8Unorm, 4) },
+ { VertexAttributeFormat.R8G8B8A8Snorm, (Format.R8Snorm, 4) },
+ { VertexAttributeFormat.R8G8B8A8Uint, (Format.R8Uint, 4) },
+ { VertexAttributeFormat.R8G8B8A8Sint, (Format.R8Sint, 4) },
+ { VertexAttributeFormat.R16G16B16A16Float, (Format.R16Float, 4) },
+ { VertexAttributeFormat.R16G16B16A16Unorm, (Format.R16Unorm, 4) },
+ { VertexAttributeFormat.R16G16B16A16Snorm, (Format.R16Snorm, 4) },
+ { VertexAttributeFormat.R16G16B16A16Uint, (Format.R16Uint, 4) },
+ { VertexAttributeFormat.R16G16B16A16Sint, (Format.R16Sint, 4) },
+ { VertexAttributeFormat.R32G32B32A32Float, (Format.R32Float, 4) },
+ { VertexAttributeFormat.R32G32B32A32Uint, (Format.R32Uint, 4) },
+ { VertexAttributeFormat.R32G32B32A32Sint, (Format.R32Sint, 4) },
+ { VertexAttributeFormat.A2B10G10R10Unorm, (Format.R10G10B10A2Unorm, 4) },
+ { VertexAttributeFormat.A2B10G10R10Uint, (Format.R10G10B10A2Uint, 4) },
+ { VertexAttributeFormat.B10G11R11Float, (Format.R11G11B10Float, 3) },
+ { VertexAttributeFormat.R8Uscaled, (Format.R8Uint, 1) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R8Sscaled, (Format.R8Sint, 1) }, // Sscaled -> Sint
+ { VertexAttributeFormat.R16Uscaled, (Format.R16Uint, 1) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R16Sscaled, (Format.R16Sint, 1) }, // Sscaled -> Sint
+ { VertexAttributeFormat.R32Uscaled, (Format.R32Uint, 1) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R32Sscaled, (Format.R32Sint, 1) }, // Sscaled -> Sint
+ { VertexAttributeFormat.R8G8Uscaled, (Format.R8Uint, 2) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R8G8Sscaled, (Format.R8Sint, 2) }, // Sscaled -> Sint
+ { VertexAttributeFormat.R16G16Uscaled, (Format.R16Uint, 2) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R16G16Sscaled, (Format.R16Sint, 2) }, // Sscaled -> Sint
+ { VertexAttributeFormat.R32G32Uscaled, (Format.R32Uint, 2) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R32G32Sscaled, (Format.R32Sint, 2) }, // Sscaled -> Sint
+ { VertexAttributeFormat.R8G8B8Uscaled, (Format.R8Uint, 3) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R8G8B8Sscaled, (Format.R8Sint, 3) }, // Sscaled -> Sint
+ { VertexAttributeFormat.R16G16B16Uscaled, (Format.R16Uint, 3) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R16G16B16Sscaled, (Format.R16Sint, 3) }, // Sscaled -> Sint
+ { VertexAttributeFormat.R32G32B32Uscaled, (Format.R32Uint, 3) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R32G32B32Sscaled, (Format.R32Sint , 3) }, // Sscaled -> Sint
+ { VertexAttributeFormat.R8G8B8A8Uscaled, (Format.R8Uint, 4) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R8G8B8A8Sscaled, (Format.R8Sint, 4) }, // Sscaled -> Sint
+ { VertexAttributeFormat.R16G16B16A16Uscaled, (Format.R16Uint, 4) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R16G16B16A16Sscaled, (Format.R16Sint, 4) }, // Sscaled -> Sint
+ { VertexAttributeFormat.R32G32B32A32Uscaled, (Format.R32Uint, 4) }, // Uscaled -> Uint
+ { VertexAttributeFormat.R32G32B32A32Sscaled, (Format.R32Sint, 4) }, // Sscaled -> Sint
+ { VertexAttributeFormat.A2B10G10R10Snorm, (Format.R10G10B10A2Uint, 4) }, // Snorm -> Uint
+ { VertexAttributeFormat.A2B10G10R10Sint, (Format.R10G10B10A2Uint, 4) }, // Sint -> Uint
+ { VertexAttributeFormat.A2B10G10R10Uscaled, (Format.R10G10B10A2Uint, 4) }, // Uscaled -> Uint
+ { VertexAttributeFormat.A2B10G10R10Sscaled, (Format.R10G10B10A2Sint, 4) } // Sscaled -> Sint
+ };
/// <summary>
/// Try getting the texture format from an encoded format integer from the Maxwell texture descriptor.
/// </summary>
@@ -581,5 +666,22 @@ namespace Ryujinx.Graphics.Gpu.Image
return _attribFormats.TryGetValue((VertexAttributeFormat)encoded, out format);
+ /// <summary>
+ /// Try getting a single component vertex attribute format from an encoded format integer from Maxwell attribute registers.
+ /// </summary>
+ /// <param name="encoded">The encoded format integer from the attribute registers</param>
+ /// <param name="format">The output single component vertex attribute format</param>
+ /// <param name="componentsCount">Number of components that the format has</param>
+ /// <returns>True if the format is valid, false otherwise</returns>
+ public static bool TryGetSingleComponentAttribFormat(uint encoded, out Format format, out int componentsCount)
+ {
+ bool result = _singleComponentAttribFormats.TryGetValue((VertexAttributeFormat)encoded, out var tuple);
+ format = tuple.Item1;
+ componentsCount = tuple.Item2;
+ return result;
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
index bf4cb5d0..8e9b4b85 100644
--- a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
+++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
@@ -6,7 +6,6 @@ using Ryujinx.Graphics.Shader;
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Memory
@@ -15,9 +14,6 @@ namespace Ryujinx.Graphics.Gpu.Memory
/// </summary>
class BufferManager
- private const int TfInfoVertexCountOffset = Constants.TotalTransformFeedbackBuffers * sizeof(int);
- private const int TfInfoBufferSize = TfInfoVertexCountOffset + sizeof(int);
private readonly GpuContext _context;
private readonly GpuChannel _channel;
@@ -104,9 +100,6 @@ namespace Ryujinx.Graphics.Gpu.Memory
private readonly BuffersPerStage[] _gpStorageBuffers;
private readonly BuffersPerStage[] _gpUniformBuffers;
- private BufferHandle _tfInfoBuffer;
- private readonly int[] _tfInfoData;
private bool _gpStorageBuffersDirty;
private bool _gpUniformBuffersDirty;
@@ -146,11 +139,6 @@ namespace Ryujinx.Graphics.Gpu.Memory
_bufferTextures = new List<BufferTextureBinding>();
_ranges = new BufferAssignment[Constants.TotalGpUniformBuffers * Constants.ShaderStages];
- if (!context.Capabilities.SupportsTransformFeedback)
- {
- _tfInfoData = new int[Constants.TotalTransformFeedbackBuffers];
- }
@@ -339,13 +327,10 @@ namespace Ryujinx.Graphics.Gpu.Memory
/// <param name="vertexCount">Vertex count per instance</param>
public void SetInstancedDrawVertexCount(int vertexCount)
- if (!_context.Capabilities.SupportsTransformFeedback &&
- HasTransformFeedbackOutputs &&
- _tfInfoBuffer != BufferHandle.Null)
+ if (!_context.Capabilities.SupportsTransformFeedback && HasTransformFeedbackOutputs)
- Span<byte> data = stackalloc byte[sizeof(int)];
- MemoryMarshal.Cast<byte, int>(data)[0] = vertexCount;
- _context.Renderer.SetBufferData(_tfInfoBuffer, TfInfoVertexCountOffset, data);
+ _context.SupportBufferUpdater.SetTfeVertexCount(vertexCount);
+ _context.SupportBufferUpdater.Commit();
@@ -607,17 +592,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
else if (HasTransformFeedbackOutputs)
- Span<int> info = _tfInfoData.AsSpan();
- Span<BufferAssignment> buffers = stackalloc BufferAssignment[Constants.TotalTransformFeedbackBuffers + 1];
- bool needsDataUpdate = false;
- if (_tfInfoBuffer == BufferHandle.Null)
- {
- _tfInfoBuffer = _context.Renderer.CreateBuffer(TfInfoBufferSize, BufferAccess.Stream);
- }
- buffers[0] = new BufferAssignment(0, new BufferRange(_tfInfoBuffer, 0, TfInfoBufferSize));
+ Span<BufferAssignment> buffers = stackalloc BufferAssignment[Constants.TotalTransformFeedbackBuffers];
int alignment = _context.Capabilities.StorageBufferOffsetAlignment;
@@ -627,7 +602,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
if (tfb.Address == 0)
- buffers[1 + index] = new BufferAssignment(1 + index, BufferRange.Empty);
+ buffers[index] = new BufferAssignment(index, BufferRange.Empty);
@@ -637,22 +612,12 @@ namespace Ryujinx.Graphics.Gpu.Memory
int tfeOffset = ((int)tfb.Address & (alignment - 1)) / 4;
- if (info[index] != tfeOffset)
- {
- info[index] = tfeOffset;
- needsDataUpdate = true;
- }
+ _context.SupportBufferUpdater.SetTfeOffset(index, tfeOffset);
- buffers[1 + index] = new BufferAssignment(1 + index, bufferCache.GetBufferRange(address, size, write: true));
+ buffers[index] = new BufferAssignment(index, bufferCache.GetBufferRange(address, size, write: true));
- if (needsDataUpdate)
- {
- Span<byte> infoData = MemoryMarshal.Cast<int, byte>(info);
- _context.Renderer.SetBufferData(_tfInfoBuffer, 0, infoData);
- }
diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferUpdater.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferUpdater.cs
new file mode 100644
index 00000000..02090c04
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferUpdater.cs
@@ -0,0 +1,123 @@
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader;
+using System;
+namespace Ryujinx.Graphics.Gpu.Memory
+ /// <summary>
+ /// Buffer data updater.
+ /// </summary>
+ class BufferUpdater : IDisposable
+ {
+ private BufferHandle _handle;
+ /// <summary>
+ /// Handle of the buffer.
+ /// </summary>
+ public BufferHandle Handle => _handle;
+ private readonly IRenderer _renderer;
+ private int _startOffset = -1;
+ private int _endOffset = -1;
+ /// <summary>
+ /// Creates a new instance of the buffer updater.
+ /// </summary>
+ /// <param name="renderer">Renderer that the buffer will be used with</param>
+ public BufferUpdater(IRenderer renderer)
+ {
+ _renderer = renderer;
+ }
+ /// <summary>
+ /// Mark a region of the buffer as modified and needing to be sent to the GPU.
+ /// </summary>
+ /// <param name="startOffset">Start offset of the region in bytes</param>
+ /// <param name="byteSize">Size of the region in bytes</param>
+ protected void MarkDirty(int startOffset, int byteSize)
+ {
+ int endOffset = startOffset + byteSize;
+ if (_startOffset == -1)
+ {
+ _startOffset = startOffset;
+ _endOffset = endOffset;
+ }
+ else
+ {
+ if (startOffset < _startOffset)
+ {
+ _startOffset = startOffset;
+ }
+ if (endOffset > _endOffset)
+ {
+ _endOffset = endOffset;
+ }
+ }
+ }
+ /// <summary>
+ /// Submits all pending buffer updates to the GPU.
+ /// </summary>
+ /// <param name="data">All data that should be sent to the GPU. Only the modified regions will be updated</param>
+ /// <param name="binding">Optional binding to bind the buffer if a new buffer was created</param>
+ protected void Commit(ReadOnlySpan<byte> data, int binding = -1)
+ {
+ if (_startOffset != -1)
+ {
+ if (_handle == BufferHandle.Null)
+ {
+ _handle = _renderer.CreateBuffer(data.Length, BufferAccess.Stream);
+ _renderer.Pipeline.ClearBuffer(_handle, 0, data.Length, 0);
+ if (binding >= 0)
+ {
+ var range = new BufferRange(_handle, 0, data.Length);
+ _renderer.Pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, range) });
+ }
+ };
+ _renderer.SetBufferData(_handle, _startOffset, data[_startOffset.._endOffset]);
+ _startOffset = -1;
+ _endOffset = -1;
+ }
+ }
+ /// <summary>
+ /// Gets a reference to a given element of a vector.
+ /// </summary>
+ /// <param name="vector">Vector to get the element reference from</param>
+ /// <param name="elementIndex">Element index</param>
+ /// <returns>Reference to the specified element</returns>
+ protected static ref T GetElementRef<T>(ref Vector4<T> vector, int elementIndex)
+ {
+ switch (elementIndex)
+ {
+ case 0:
+ return ref vector.X;
+ case 1:
+ return ref vector.Y;
+ case 2:
+ return ref vector.Z;
+ case 3:
+ return ref vector.W;
+ default:
+ throw new ArgumentOutOfRangeException(nameof(elementIndex));
+ }
+ }
+ /// <summary>
+ /// Destroys the buffer.
+ /// </summary>
+ public void Dispose()
+ {
+ if (_handle != BufferHandle.Null)
+ {
+ _renderer.DeleteBuffer(_handle);
+ _handle = BufferHandle.Null;
+ }
+ }
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Memory/SupportBufferUpdater.cs b/src/Ryujinx.Graphics.Gpu/Memory/SupportBufferUpdater.cs
index c1e91c54..fb141db4 100644
--- a/src/Ryujinx.Graphics.Gpu/Memory/SupportBufferUpdater.cs
+++ b/src/Ryujinx.Graphics.Gpu/Memory/SupportBufferUpdater.cs
@@ -9,57 +9,22 @@ namespace Ryujinx.Graphics.Gpu.Memory
/// <summary>
/// Support buffer data updater.
/// </summary>
- class SupportBufferUpdater : IDisposable
+ class SupportBufferUpdater : BufferUpdater
private SupportBuffer _data;
- private BufferHandle _handle;
- private readonly IRenderer _renderer;
- private int _startOffset = -1;
- private int _endOffset = -1;
/// <summary>
/// Creates a new instance of the support buffer updater.
/// </summary>
/// <param name="renderer">Renderer that the support buffer will be used with</param>
- public SupportBufferUpdater(IRenderer renderer)
+ public SupportBufferUpdater(IRenderer renderer) : base(renderer)
- _renderer = renderer;
var defaultScale = new Vector4<float> { X = 1f, Y = 0f, Z = 0f, W = 0f };
DirtyRenderScale(0, SupportBuffer.RenderScaleMaxCount);
/// <summary>
- /// Mark a region of the support buffer as modified and needing to be sent to the GPU.
- /// </summary>
- /// <param name="startOffset">Start offset of the region in bytes</param>
- /// <param name="byteSize">Size of the region in bytes</param>
- private void MarkDirty(int startOffset, int byteSize)
- {
- int endOffset = startOffset + byteSize;
- if (_startOffset == -1)
- {
- _startOffset = startOffset;
- _endOffset = endOffset;
- }
- else
- {
- if (startOffset < _startOffset)
- {
- _startOffset = startOffset;
- }
- if (endOffset > _endOffset)
- {
- _endOffset = endOffset;
- }
- }
- }
- /// <summary>
/// Marks the fragment render scale count as being modified.
/// </summary>
private void DirtyFragmentRenderScaleCount()
@@ -220,40 +185,40 @@ namespace Ryujinx.Graphics.Gpu.Memory
/// <summary>
- /// Submits all pending buffer updates to the GPU.
+ /// Sets offset for the misaligned portion of a transform feedback buffer, and the buffer size, for transform feedback emulation.
/// </summary>
- public void Commit()
+ /// <param name="bufferIndex">Index of the transform feedback buffer</param>
+ /// <param name="offset">Misaligned offset of the buffer</param>
+ public void SetTfeOffset(int bufferIndex, int offset)
- if (_startOffset != -1)
- {
- if (_handle == BufferHandle.Null)
- {
- _handle = _renderer.CreateBuffer(SupportBuffer.RequiredSize, BufferAccess.Stream);
- _renderer.Pipeline.ClearBuffer(_handle, 0, SupportBuffer.RequiredSize, 0);
+ ref int currentOffset = ref GetElementRef(ref _data.TfeOffset, bufferIndex);
- var range = new BufferRange(_handle, 0, SupportBuffer.RequiredSize);
- _renderer.Pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, range) });
- }
- ReadOnlySpan<byte> data = MemoryMarshal.Cast<SupportBuffer, byte>(MemoryMarshal.CreateSpan(ref _data, 1));
- _renderer.SetBufferData(_handle, _startOffset, data[_startOffset.._endOffset]);
- _startOffset = -1;
- _endOffset = -1;
+ if (currentOffset != offset)
+ {
+ currentOffset = offset;
+ MarkDirty(SupportBuffer.TfeOffsetOffset + bufferIndex * sizeof(int), sizeof(int));
/// <summary>
- /// Destroys the support buffer.
+ /// Sets the vertex count used for transform feedback emulation with instanced draws.
/// </summary>
- public void Dispose()
+ /// <param name="vertexCount">Vertex count of the instanced draw</param>
+ public void SetTfeVertexCount(int vertexCount)
- if (_handle != BufferHandle.Null)
+ if (_data.TfeVertexCount.X != vertexCount)
- _renderer.DeleteBuffer(_handle);
- _handle = BufferHandle.Null;
+ _data.TfeVertexCount.X = vertexCount;
+ MarkDirty(SupportBuffer.TfeVertexCountOffset, sizeof(int));
+ /// <summary>
+ /// Submits all pending buffer updates to the GPU.
+ /// </summary>
+ public void Commit()
+ {
+ Commit(MemoryMarshal.Cast<SupportBuffer, byte>(MemoryMarshal.CreateSpan(ref _data, 1)), SupportBuffer.Binding);
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs
index ff9c39a1..600c8a98 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs
@@ -15,6 +15,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
public IProgram HostProgram { get; }
/// <summary>
+ /// Optional vertex shader converted to compute.
+ /// </summary>
+ public ShaderAsCompute VertexAsCompute { get; }
+ /// <summary>
+ /// Optional geometry shader converted to compute.
+ /// </summary>
+ public ShaderAsCompute GeometryAsCompute { get; }
+ /// <summary>
/// GPU state used to create this version of the shader.
/// </summary>
public ShaderSpecializationState SpecializationState { get; }
@@ -45,12 +55,25 @@ namespace Ryujinx.Graphics.Gpu.Shader
Bindings = new CachedShaderBindings(shaders.Length == 1, shaders);
+ public CachedShaderProgram(
+ IProgram hostProgram,
+ ShaderAsCompute vertexAsCompute,
+ ShaderAsCompute geometryAsCompute,
+ ShaderSpecializationState specializationState,
+ CachedShaderStage[] shaders) : this(hostProgram, specializationState, shaders)
+ {
+ VertexAsCompute = vertexAsCompute;
+ GeometryAsCompute = geometryAsCompute;
+ }
/// <summary>
/// Dispose of the host shader resources.
/// </summary>
public void Dispose()
+ VertexAsCompute?.HostProgram.Dispose();
+ GeometryAsCompute?.HostProgram.Dispose();
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs
index 93d293f6..de6432bc 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs
@@ -35,7 +35,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
ShaderSpecializationState oldSpecState,
ShaderSpecializationState newSpecState,
ResourceCounts counts,
- int stageIndex) : base(context, counts, stageIndex, oldSpecState.TransformFeedbackDescriptors != null)
+ int stageIndex) : base(context, counts, stageIndex)
_data = data;
_cb1Data = cb1Data;
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
index 08096bd5..99ef89ed 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
- private const uint CodeGenVersion = 5609;
+ private const uint CodeGenVersion = 5551;
private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data";
@@ -141,6 +141,21 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
public ShaderStage Stage;
/// <summary>
+ /// Number of vertices that each output primitive has on a geometry shader.
+ /// </summary>
+ public byte GeometryVerticesPerPrimitive;
+ /// <summary>
+ /// Maximum number of vertices that a geometry shader may generate.
+ /// </summary>
+ public ushort GeometryMaxOutputVertices;
+ /// <summary>
+ /// Number of invocations per primitive on tessellation or geometry shaders.
+ /// </summary>
+ public ushort ThreadsPerInputPrimitive;
+ /// <summary>
/// Indicates if the fragment shader accesses the fragment coordinate built-in variable.
/// </summary>
public bool UsesFragCoord;
@@ -783,9 +798,10 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
- ShaderIdentification.None,
- 0,
+ dataInfo.GeometryVerticesPerPrimitive,
+ dataInfo.GeometryMaxOutputVertices,
+ dataInfo.ThreadsPerInputPrimitive,
@@ -813,6 +829,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
TexturesCount = (ushort)info.Textures.Count,
ImagesCount = (ushort)info.Images.Count,
Stage = info.Stage,
+ GeometryVerticesPerPrimitive = (byte)info.GeometryVerticesPerPrimitive,
+ GeometryMaxOutputVertices = (ushort)info.GeometryMaxOutputVertices,
+ ThreadsPerInputPrimitive = (ushort)info.ThreadsPerInputPrimitive,
UsesFragCoord = info.UsesFragCoord,
UsesInstanceId = info.UsesInstanceId,
UsesDrawParameters = info.UsesDrawParameters,
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs
index 8c2108bf..153fc442 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs
@@ -595,6 +595,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
ResourceCounts counts = new();
+ DiskCacheGpuAccessor[] gpuAccessors = new DiskCacheGpuAccessor[Constants.ShaderStages];
TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1];
TranslatorContext nextStage = null;
@@ -626,14 +627,22 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, api, DefaultFlags | TranslationFlags.VertexA, 0);
+ gpuAccessors[stageIndex] = gpuAccessor;
translatorContexts[stageIndex + 1] = currentStage;
nextStage = currentStage;
- if (!_context.Capabilities.SupportsGeometryShader)
+ bool hasGeometryShader = translatorContexts[4] != null;
+ bool vertexHasStore = translatorContexts[1] != null && translatorContexts[1].HasStore;
+ bool geometryHasStore = hasGeometryShader && translatorContexts[4].HasStore;
+ bool vertexToCompute = ShouldConvertVertexToCompute(_context, vertexHasStore, geometryHasStore, hasGeometryShader);
+ // We don't support caching shader stages that have been converted to compute currently,
+ // so just eliminate them if they exist in the cache.
+ if (vertexToCompute)
- ShaderCache.TryRemoveGeometryStage(translatorContexts);
+ return;
CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length];
@@ -647,6 +656,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
if (currentStage != null)
+ gpuAccessors[stageIndex].InitializeReservedCounts(specState.TransformFeedbackDescriptors != null, vertexToCompute);
ShaderProgram program;
byte[] guestCode = guestShaders[stageIndex + 1].Value.Code;
@@ -701,6 +712,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
ResourceCounts counts = new();
ShaderSpecializationState newSpecState = new(ref specState.ComputeState);
DiskCacheGpuAccessor gpuAccessor = new(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0);
+ gpuAccessor.InitializeReservedCounts(tfEnabled: false, vertexAsCompute: false);
TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, 0);
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
index b4f4a439..1d84d0e4 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
@@ -25,11 +25,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="channel">GPU channel</param>
/// <param name="state">Current GPU state</param>
/// <param name="stageIndex">Graphics shader stage index (0 = Vertex, 4 = Fragment)</param>
- public GpuAccessor(
- GpuContext context,
- GpuChannel channel,
- GpuAccessorState state,
- int stageIndex) : base(context, state.ResourceCounts, stageIndex, state.TransformFeedbackDescriptors != null)
+ public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state, int stageIndex) : base(context, state.ResourceCounts, stageIndex)
_isVulkan = context.Capabilities.Api == TargetApi.Vulkan;
_channel = channel;
@@ -49,7 +45,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="context">GPU context</param>
/// <param name="channel">GPU channel</param>
/// <param name="state">Current GPU state</param>
- public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context, state.ResourceCounts, 0, false)
+ public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context, state.ResourceCounts, 0)
_channel = channel;
_state = state;
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
index 52193940..9d030cd6 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
@@ -15,8 +15,10 @@ namespace Ryujinx.Graphics.Gpu.Shader
private readonly ResourceCounts _resourceCounts;
private readonly int _stageIndex;
- private readonly int _reservedConstantBuffers;
- private readonly int _reservedStorageBuffers;
+ private int _reservedConstantBuffers;
+ private int _reservedStorageBuffers;
+ private int _reservedTextures;
+ private int _reservedImages;
/// <summary>
/// Creates a new GPU accessor.
@@ -24,15 +26,26 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="context">GPU context</param>
/// <param name="resourceCounts">Counter of GPU resources used by the shader</param>
/// <param name="stageIndex">Index of the shader stage, 0 for compute</param>
- /// <param name="tfEnabled">Indicates if the current graphics shader is used with transform feedback enabled</param>
- public GpuAccessorBase(GpuContext context, ResourceCounts resourceCounts, int stageIndex, bool tfEnabled)
+ public GpuAccessorBase(GpuContext context, ResourceCounts resourceCounts, int stageIndex)
_context = context;
_resourceCounts = resourceCounts;
_stageIndex = stageIndex;
+ }
+ /// <summary>
+ /// Initializes counts for bindings that will be reserved for emulator use.
+ /// </summary>
+ /// <param name="tfEnabled">Indicates if the current graphics shader is used with transform feedback enabled</param>
+ /// <param name="vertexAsCompute">Indicates that the vertex shader will be emulated on a compute shader</param>
+ public void InitializeReservedCounts(bool tfEnabled, bool vertexAsCompute)
+ {
+ ResourceReservationCounts rrc = new(!_context.Capabilities.SupportsTransformFeedback && tfEnabled, vertexAsCompute);
- _reservedConstantBuffers = 1; // For the support buffer.
- _reservedStorageBuffers = !context.Capabilities.SupportsTransformFeedback && tfEnabled ? 5 : 0;
+ _reservedConstantBuffers = rrc.ReservedConstantBuffers;
+ _reservedStorageBuffers = rrc.ReservedStorageBuffers;
+ _reservedTextures = rrc.ReservedTextures;
+ _reservedImages = rrc.ReservedImages;
public int QueryBindingConstantBuffer(int index)
@@ -69,6 +82,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
public int QueryBindingTexture(int index, bool isBuffer)
+ int binding;
if (_context.Capabilities.Api == TargetApi.Vulkan)
if (isBuffer)
@@ -76,16 +91,20 @@ namespace Ryujinx.Graphics.Gpu.Shader
index += (int)_context.Capabilities.MaximumTexturesPerStage;
- return GetBindingFromIndex(index, _context.Capabilities.MaximumTexturesPerStage * 2, "Texture");
+ binding = GetBindingFromIndex(index, _context.Capabilities.MaximumTexturesPerStage * 2, "Texture");
- return _resourceCounts.TexturesCount++;
+ binding = _resourceCounts.TexturesCount++;
+ return binding + _reservedTextures;
public int QueryBindingImage(int index, bool isBuffer)
+ int binding;
if (_context.Capabilities.Api == TargetApi.Vulkan)
if (isBuffer)
@@ -93,12 +112,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
index += (int)_context.Capabilities.MaximumImagesPerStage;
- return GetBindingFromIndex(index, _context.Capabilities.MaximumImagesPerStage * 2, "Image");
+ binding = GetBindingFromIndex(index, _context.Capabilities.MaximumImagesPerStage * 2, "Image");
- return _resourceCounts.ImagesCount++;
+ binding = _resourceCounts.ImagesCount++;
+ return binding + _reservedImages;
private int GetBindingFromIndex(int index, uint maxPerStage, string resourceName)
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderAsCompute.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderAsCompute.cs
new file mode 100644
index 00000000..71540a13
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderAsCompute.cs
@@ -0,0 +1,20 @@
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
+namespace Ryujinx.Graphics.Gpu.Shader
+ class ShaderAsCompute
+ {
+ public IProgram HostProgram { get; }
+ public ShaderProgramInfo Info { get; }
+ public ResourceReservations Reservations { get; }
+ public ShaderAsCompute(IProgram hostProgram, ShaderProgramInfo info, ResourceReservations reservations)
+ {
+ HostProgram = hostProgram;
+ Info = info;
+ Reservations = reservations;
+ }
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index 97d7a720..38be262a 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -215,9 +215,10 @@ namespace Ryujinx.Graphics.Gpu.Shader
ShaderSpecializationState specState = new(ref computeState);
GpuAccessorState gpuAccessorState = new(poolState, computeState, default, specState);
GpuAccessor gpuAccessor = new(_context, channel, gpuAccessorState);
+ gpuAccessor.InitializeReservedCounts(tfEnabled: false, vertexAsCompute: false);
TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, gpuVa);
- TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode);
+ TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode, asCompute: false);
ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) };
ShaderInfo info = ShaderInfoBuilder.BuildForCompute(_context, translatedShader.Program.Info);
@@ -321,6 +322,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan();
+ GpuAccessor[] gpuAccessors = new GpuAccessor[Constants.ShaderStages];
TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1];
TranslatorContext nextStage = null;
@@ -345,22 +347,31 @@ namespace Ryujinx.Graphics.Gpu.Shader
translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA);
+ gpuAccessors[stageIndex] = gpuAccessor;
translatorContexts[stageIndex + 1] = currentStage;
nextStage = currentStage;
- if (!_context.Capabilities.SupportsGeometryShader)
- {
- TryRemoveGeometryStage(translatorContexts);
- }
+ bool hasGeometryShader = translatorContexts[4] != null;
+ bool vertexHasStore = translatorContexts[1] != null && translatorContexts[1].HasStore;
+ bool geometryHasStore = hasGeometryShader && translatorContexts[4].HasStore;
+ bool vertexToCompute = ShouldConvertVertexToCompute(_context, vertexHasStore, geometryHasStore, hasGeometryShader);
+ bool geometryToCompute = ShouldConvertGeometryToCompute(_context, geometryHasStore);
CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1];
List<ShaderSource> shaderSources = new();
TranslatorContext previousStage = null;
+ ShaderInfoBuilder infoBuilder = new(_context, transformFeedbackDescriptors != null, vertexToCompute);
+ if (geometryToCompute && translatorContexts[4] != null)
+ {
+ translatorContexts[4].SetVertexOutputMapForGeometryAsCompute(translatorContexts[1]);
+ }
- ShaderInfoBuilder infoBuilder = new(_context, transformFeedbackDescriptors != null);
+ ShaderAsCompute vertexAsCompute = null;
+ ShaderAsCompute geometryAsCompute = null;
for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
@@ -368,8 +379,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
if (currentStage != null)
+ gpuAccessors[stageIndex].InitializeReservedCounts(transformFeedbackDescriptors != null, vertexToCompute);
ShaderProgram program;
+ bool asCompute = (stageIndex == 0 && vertexToCompute) || (stageIndex == 3 && geometryToCompute);
if (stageIndex == 0 && translatorContexts[0] != null)
TranslatedShaderVertexPair translatedShader = TranslateShader(
@@ -378,7 +393,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
- cachedGuestCode.VertexBCode);
+ cachedGuestCode.VertexBCode,
+ asCompute);
shaders[0] = translatedShader.VertexA;
shaders[1] = translatedShader.VertexB;
@@ -388,12 +404,31 @@ namespace Ryujinx.Graphics.Gpu.Shader
byte[] code = cachedGuestCode.GetByIndex(stageIndex);
- TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code);
+ TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code, asCompute);
shaders[stageIndex + 1] = translatedShader.Shader;
program = translatedShader.Program;
+ if (asCompute)
+ {
+ bool tfEnabled = transformFeedbackDescriptors != null;
+ if (stageIndex == 0)
+ {
+ vertexAsCompute = CreateHostVertexAsComputeProgram(program, currentStage, tfEnabled);
+ TranslatorContext lastInVertexPipeline = geometryToCompute ? translatorContexts[4] ?? currentStage : currentStage;
+ program = lastInVertexPipeline.GenerateVertexPassthroughForCompute();
+ }
+ else
+ {
+ geometryAsCompute = CreateHostVertexAsComputeProgram(program, currentStage, tfEnabled);
+ program = null;
+ }
+ }
if (program != null)
@@ -418,46 +453,81 @@ namespace Ryujinx.Graphics.Gpu.Shader
IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, info);
- gpShaders = new CachedShaderProgram(hostProgram, specState, shaders);
+ gpShaders = new(hostProgram, vertexAsCompute, geometryAsCompute, specState, shaders);
- EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray);
+ // We don't currently support caching shaders that have been converted to compute.
+ if (vertexAsCompute == null)
+ {
+ EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray);
+ }
_gpPrograms[addresses] = gpShaders;
return gpShaders;
/// <summary>
- /// Tries to eliminate the geometry stage from the array of translator contexts.
+ /// Checks if a vertex shader should be converted to a compute shader due to it making use of
+ /// features that are not supported on the host.
/// </summary>
- /// <param name="translatorContexts">Array of translator contexts</param>
- public static void TryRemoveGeometryStage(TranslatorContext[] translatorContexts)
+ /// <param name="context">GPU context of the shader</param>
+ /// <param name="vertexHasStore">Whether the vertex shader has image or storage buffer store operations</param>
+ /// <param name="geometryHasStore">Whether the geometry shader has image or storage buffer store operations, if one exists</param>
+ /// <param name="hasGeometryShader">Whether a geometry shader exists</param>
+ /// <returns>True if the vertex shader should be converted to compute, false otherwise</returns>
+ public static bool ShouldConvertVertexToCompute(GpuContext context, bool vertexHasStore, bool geometryHasStore, bool hasGeometryShader)
- if (translatorContexts[4] != null)
+ // If the host does not support store operations on vertex,
+ // we need to emulate it on a compute shader.
+ if (!context.Capabilities.SupportsVertexStoreAndAtomics && vertexHasStore)
- // We have a geometry shader, but geometry shaders are not supported.
- // Try to eliminate the geometry shader.
+ return true;
+ }
- ShaderProgramInfo info = translatorContexts[4].Translate().Info;
+ // If any stage after the vertex stage is converted to compute,
+ // we need to convert vertex to compute too.
+ return hasGeometryShader && ShouldConvertGeometryToCompute(context, geometryHasStore);
+ }
- if (info.Identification == ShaderIdentification.GeometryLayerPassthrough)
- {
- // We managed to identify that this geometry shader is only used to set the output Layer value,
- // we can set the Layer on the previous stage instead (usually the vertex stage) and eliminate it.
+ /// <summary>
+ /// Checks if a geometry shader should be converted to a compute shader due to it making use of
+ /// features that are not supported on the host.
+ /// </summary>
+ /// <param name="context">GPU context of the shader</param>
+ /// <param name="geometryHasStore">Whether the geometry shader has image or storage buffer store operations, if one exists</param>
+ /// <returns>True if the geometry shader should be converted to compute, false otherwise</returns>
+ public static bool ShouldConvertGeometryToCompute(GpuContext context, bool geometryHasStore)
+ {
+ return (!context.Capabilities.SupportsVertexStoreAndAtomics && geometryHasStore) ||
+ !context.Capabilities.SupportsGeometryShader;
+ }
- for (int i = 3; i >= 1; i--)
- {
- if (translatorContexts[i] != null)
- {
- translatorContexts[i].SetGeometryShaderLayerInputAttribute(info.GpLayerInputAttribute);
- translatorContexts[i].SetLastInVertexPipeline();
- break;
- }
- }
+ /// <summary>
+ /// Checks if it might be necessary for any vertex, tessellation or geometry shader to be converted to compute,
+ /// based on the supported host features.
+ /// </summary>
+ /// <param name="capabilities">Host capabilities</param>
+ /// <returns>True if the possibility of a shader being converted to compute exists, false otherwise</returns>
+ public static bool MayConvertVtgToCompute(ref Capabilities capabilities)
+ {
+ return !capabilities.SupportsVertexStoreAndAtomics || !capabilities.SupportsGeometryShader;
+ }
- translatorContexts[4] = null;
- }
- }
+ /// <summary>
+ /// Creates a compute shader from a vertex, tessellation or geometry shader that has been converted to compute.
+ /// </summary>
+ /// <param name="program">Shader program</param>
+ /// <param name="context">Translation context of the shader</param>
+ /// <param name="tfEnabled">Whether transform feedback is enabled</param>
+ /// <returns>Compute shader</returns>
+ private ShaderAsCompute CreateHostVertexAsComputeProgram(ShaderProgram program, TranslatorContext context, bool tfEnabled)
+ {
+ ShaderSource source = new(program.Code, program.BinaryCode, ShaderStage.Compute, program.Language);
+ ShaderInfo info = ShaderInfoBuilder.BuildForVertexAsCompute(_context, program.Info, tfEnabled);
+ return new(_context.Renderer.CreateProgram(new[] { source }, info), program.Info, context.GetResourceReservations());
/// <summary>
@@ -573,9 +643,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
+ bool vertexAsCompute = gpShaders.VertexAsCompute != null;
bool usesDrawParameters = gpShaders.Shaders[1]?.Info.UsesDrawParameters ?? false;
- return gpShaders.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true);
+ return gpShaders.SpecializationState.MatchesGraphics(
+ channel,
+ ref poolState,
+ ref graphicsState,
+ vertexAsCompute,
+ usesDrawParameters,
+ checkTextures: true);
/// <summary>
@@ -636,6 +713,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="vertexA">Optional translator context of the shader that should be combined</param>
/// <param name="codeA">Optional Maxwell binary code of the Vertex A shader, if present</param>
/// <param name="codeB">Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache</param>
+ /// <param name="asCompute">Indicates that the vertex shader should be converted to a compute shader</param>
/// <returns>Compiled graphics shader code</returns>
private static TranslatedShaderVertexPair TranslateShader(
ShaderDumper dumper,
@@ -643,7 +721,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
TranslatorContext currentStage,
TranslatorContext vertexA,
byte[] codeA,
- byte[] codeB)
+ byte[] codeB,
+ bool asCompute)
ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1);
@@ -663,7 +742,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
pathsB = dumper.Dump(codeB, compute: false);
- ShaderProgram program = currentStage.Translate(vertexA);
+ ShaderProgram program = currentStage.Translate(vertexA, asCompute);
@@ -681,8 +760,9 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="channel">GPU channel using the shader</param>
/// <param name="context">Translator context of the stage to be translated</param>
/// <param name="code">Optional Maxwell binary code of the current stage shader, if present on cache</param>
+ /// <param name="asCompute">Indicates that the vertex shader should be converted to a compute shader</param>
/// <returns>Compiled graphics shader code</returns>
- private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code)
+ private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code, bool asCompute)
var memoryManager = channel.MemoryManager;
@@ -694,7 +774,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray();
ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default;
- ShaderProgram program = context.Translate();
+ ShaderProgram program = context.Translate(asCompute);
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs
index af1e1ee3..bea916a6 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs
@@ -33,6 +33,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
private readonly int _reservedConstantBuffers;
private readonly int _reservedStorageBuffers;
+ private readonly int _reservedTextures;
+ private readonly int _reservedImages;
private readonly List<ResourceDescriptor>[] _resourceDescriptors;
private readonly List<ResourceUsage>[] _resourceUsages;
@@ -42,7 +44,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary>
/// <param name="context">GPU context that owns the shaders that will be added to the builder</param>
/// <param name="tfEnabled">Indicates if the graphics shader is used with transform feedback enabled</param>
- public ShaderInfoBuilder(GpuContext context, bool tfEnabled)
+ /// <param name="vertexAsCompute">Indicates that the vertex shader will be emulated on a compute shader</param>
+ public ShaderInfoBuilder(GpuContext context, bool tfEnabled, bool vertexAsCompute = false)
_context = context;
@@ -60,27 +63,34 @@ namespace Ryujinx.Graphics.Gpu.Shader
AddDescriptor(SupportBufferStages, ResourceType.UniformBuffer, UniformSetIndex, 0, 1);
AddUsage(SupportBufferStages, ResourceType.UniformBuffer, ResourceAccess.Read, UniformSetIndex, 0, 1);
- _reservedConstantBuffers = 1; // For the support buffer.
+ ResourceReservationCounts rrc = new(!context.Capabilities.SupportsTransformFeedback && tfEnabled, vertexAsCompute);
- if (!context.Capabilities.SupportsTransformFeedback && tfEnabled)
- {
- _reservedStorageBuffers = 5;
+ _reservedConstantBuffers = rrc.ReservedConstantBuffers;
+ _reservedStorageBuffers = rrc.ReservedStorageBuffers;
+ _reservedTextures = rrc.ReservedTextures;
+ _reservedImages = rrc.ReservedImages;
- AddDescriptor(VtgStages, ResourceType.StorageBuffer, StorageSetIndex, 0, 5);
- AddUsage(VtgStages, ResourceType.StorageBuffer, ResourceAccess.Read, StorageSetIndex, 0, 1);
- AddUsage(VtgStages, ResourceType.StorageBuffer, ResourceAccess.Write, StorageSetIndex, 1, 4);
- }
- else
- {
- _reservedStorageBuffers = 0;
- }
+ // TODO: Handle that better? Maybe we should only set the binding that are really needed on each shader.
+ ResourceStages stages = vertexAsCompute ? ResourceStages.Compute | ResourceStages.Vertex : VtgStages;
+ PopulateDescriptorAndUsages(stages, ResourceType.UniformBuffer, ResourceAccess.Read, UniformSetIndex, 1, rrc.ReservedConstantBuffers - 1);
+ PopulateDescriptorAndUsages(stages, ResourceType.StorageBuffer, ResourceAccess.ReadWrite, StorageSetIndex, 0, rrc.ReservedStorageBuffers);
+ PopulateDescriptorAndUsages(stages, ResourceType.BufferTexture, ResourceAccess.Read, TextureSetIndex, 0, rrc.ReservedTextures);
+ PopulateDescriptorAndUsages(stages, ResourceType.BufferImage, ResourceAccess.ReadWrite, ImageSetIndex, 0, rrc.ReservedImages);
+ }
+ private void PopulateDescriptorAndUsages(ResourceStages stages, ResourceType type, ResourceAccess access, int setIndex, int start, int count)
+ {
+ AddDescriptor(stages, type, setIndex, start, count);
+ AddUsage(stages, type, access, setIndex, start, count);
/// <summary>
/// Adds information from a given shader stage.
/// </summary>
/// <param name="info">Shader stage information</param>
- public void AddStageInfo(ShaderProgramInfo info)
+ /// <param name="vertexAsCompute">True if the shader stage has been converted into a compute shader</param>
+ public void AddStageInfo(ShaderProgramInfo info, bool vertexAsCompute = false)
if (info.Stage == ShaderStage.Fragment)
@@ -96,7 +106,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
_ => 0,
- ResourceStages stages = info.Stage switch
+ ResourceStages stages = vertexAsCompute ? ResourceStages.Compute : info.Stage switch
ShaderStage.Compute => ResourceStages.Compute,
ShaderStage.Vertex => ResourceStages.Vertex,
@@ -114,8 +124,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
int uniformBinding = _reservedConstantBuffers + stageIndex * uniformsPerStage;
int storageBinding = _reservedStorageBuffers + stageIndex * storagesPerStage;
- int textureBinding = stageIndex * texturesPerStage * 2;
- int imageBinding = stageIndex * imagesPerStage * 2;
+ int textureBinding = _reservedTextures + stageIndex * texturesPerStage * 2;
+ int imageBinding = _reservedImages + stageIndex * imagesPerStage * 2;
AddDescriptor(stages, ResourceType.UniformBuffer, UniformSetIndex, uniformBinding, uniformsPerStage);
AddDescriptor(stages, ResourceType.StorageBuffer, StorageSetIndex, storageBinding, storagesPerStage);
@@ -285,11 +295,28 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <returns>Shader information</returns>
public static ShaderInfo BuildForCompute(GpuContext context, ShaderProgramInfo info, bool fromCache = false)
- ShaderInfoBuilder builder = new(context, tfEnabled: false);
+ ShaderInfoBuilder builder = new(context, tfEnabled: false, vertexAsCompute: false);
return builder.Build(null, fromCache);
+ /// <summary>
+ /// Builds shader information for a vertex or geometry shader thas was converted to compute shader.
+ /// </summary>
+ /// <param name="context">GPU context that owns the shader</param>
+ /// <param name="info">Compute shader information</param>
+ /// <param name="tfEnabled">Indicates if the graphics shader is used with transform feedback enabled</param>
+ /// <param name="fromCache">True if the compute shader comes from a disk cache, false otherwise</param>
+ /// <returns>Shader information</returns>
+ public static ShaderInfo BuildForVertexAsCompute(GpuContext context, ShaderProgramInfo info, bool tfEnabled, bool fromCache = false)
+ {
+ ShaderInfoBuilder builder = new(context, tfEnabled, vertexAsCompute: true);
+ builder.AddStageInfo(info, vertexAsCompute: true);
+ return builder.Build(null, fromCache);
+ }
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs
index e57e1df1..3c2f0b9b 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs
@@ -35,9 +35,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
foreach (var entry in _entries)
+ bool vertexAsCompute = entry.VertexAsCompute != null;
bool usesDrawParameters = entry.Shaders[1]?.Info.UsesDrawParameters ?? false;
- if (entry.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true))
+ if (entry.SpecializationState.MatchesGraphics(
+ channel,
+ ref poolState,
+ ref graphicsState,
+ vertexAsCompute,
+ usesDrawParameters,
+ checkTextures: true))
program = entry;
return true;
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs
index fcd95375..a41f761b 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs
@@ -457,6 +457,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="graphicsState">Graphics state</param>
+ /// <param name="vertexAsCompute">Indicates that the vertex shader has been converted into a compute shader</param>
/// <param name="usesDrawParameters">Indicates whether the vertex shader accesses draw parameters</param>
/// <param name="checkTextures">Indicates whether texture descriptors should be checked</param>
/// <returns>True if the state matches, false otherwise</returns>
@@ -464,6 +465,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
GpuChannel channel,
ref GpuChannelPoolState poolState,
ref GpuChannelGraphicsState graphicsState,
+ bool vertexAsCompute,
bool usesDrawParameters,
bool checkTextures)
@@ -497,9 +499,25 @@ namespace Ryujinx.Graphics.Gpu.Shader
return false;
- if (!graphicsState.AttributeTypes.AsSpan().SequenceEqual(GraphicsState.AttributeTypes.AsSpan()))
+ if (ShaderCache.MayConvertVtgToCompute(ref channel.Capabilities) && !vertexAsCompute)
- return false;
+ for (int index = 0; index < graphicsState.AttributeTypes.Length; index++)
+ {
+ AttributeType lType = FilterAttributeType(channel, graphicsState.AttributeTypes[index]);
+ AttributeType rType = FilterAttributeType(channel, GraphicsState.AttributeTypes[index]);
+ if (lType != rType)
+ {
+ return false;
+ }
+ }
+ }
+ else
+ {
+ if (!graphicsState.AttributeTypes.AsSpan().SequenceEqual(GraphicsState.AttributeTypes.AsSpan()))
+ {
+ return false;
+ }
if (usesDrawParameters && graphicsState.HasConstantBufferDrawParameters != GraphicsState.HasConstantBufferDrawParameters)
@@ -530,6 +548,19 @@ namespace Ryujinx.Graphics.Gpu.Shader
return Matches(channel, ref poolState, checkTextures, isCompute: false);
+ private static AttributeType FilterAttributeType(GpuChannel channel, AttributeType type)
+ {
+ type &= ~(AttributeType.Packed | AttributeType.PackedRgb10A2Signed);
+ if (channel.Capabilities.SupportsScaledVertexFormats &&
+ (type == AttributeType.Sscaled || type == AttributeType.Uscaled))
+ {
+ type = AttributeType.Float;
+ }
+ return type;
+ }
/// <summary>
/// Checks if the recorded state matches the current GPU compute engine state.
/// </summary>