From f09bba82b9366e5912b639a610ae89cbb1cf352c Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Tue, 29 Aug 2023 21:10:34 -0300
Subject: Geometry shader emulation for macOS (#5551)

* Implement vertex and geometry shader conversion to compute

* Call InitializeReservedCounts for compute too

* PR feedback

* Set clip distance mask for geometry and tessellation shaders too

* Transform feedback emulation only for vertex
---
 src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 156 +++++++++++++++++++------
 1 file changed, 118 insertions(+), 38 deletions(-)

(limited to 'src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs')

diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index 97d7a720..38be262a 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -215,9 +215,10 @@ namespace Ryujinx.Graphics.Gpu.Shader
             ShaderSpecializationState specState = new(ref computeState);
             GpuAccessorState gpuAccessorState = new(poolState, computeState, default, specState);
             GpuAccessor gpuAccessor = new(_context, channel, gpuAccessorState);
+            gpuAccessor.InitializeReservedCounts(tfEnabled: false, vertexAsCompute: false);
 
             TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, gpuVa);
-            TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode);
+            TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode, asCompute: false);
 
             ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) };
             ShaderInfo info = ShaderInfoBuilder.BuildForCompute(_context, translatedShader.Program.Info);
@@ -321,6 +322,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan();
 
+            GpuAccessor[] gpuAccessors = new GpuAccessor[Constants.ShaderStages];
             TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1];
             TranslatorContext nextStage = null;
 
@@ -345,22 +347,31 @@ namespace Ryujinx.Graphics.Gpu.Shader
                         translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA);
                     }
 
+                    gpuAccessors[stageIndex] = gpuAccessor;
                     translatorContexts[stageIndex + 1] = currentStage;
                     nextStage = currentStage;
                 }
             }
 
-            if (!_context.Capabilities.SupportsGeometryShader)
-            {
-                TryRemoveGeometryStage(translatorContexts);
-            }
+            bool hasGeometryShader = translatorContexts[4] != null;
+            bool vertexHasStore = translatorContexts[1] != null && translatorContexts[1].HasStore;
+            bool geometryHasStore = hasGeometryShader && translatorContexts[4].HasStore;
+            bool vertexToCompute = ShouldConvertVertexToCompute(_context, vertexHasStore, geometryHasStore, hasGeometryShader);
+            bool geometryToCompute = ShouldConvertGeometryToCompute(_context, geometryHasStore);
 
             CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1];
             List<ShaderSource> shaderSources = new();
 
             TranslatorContext previousStage = null;
+            ShaderInfoBuilder infoBuilder = new(_context, transformFeedbackDescriptors != null, vertexToCompute);
+
+            if (geometryToCompute && translatorContexts[4] != null)
+            {
+                translatorContexts[4].SetVertexOutputMapForGeometryAsCompute(translatorContexts[1]);
+            }
 
-            ShaderInfoBuilder infoBuilder = new(_context, transformFeedbackDescriptors != null);
+            ShaderAsCompute vertexAsCompute = null;
+            ShaderAsCompute geometryAsCompute = null;
 
             for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
             {
@@ -368,8 +379,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
                 if (currentStage != null)
                 {
+                    gpuAccessors[stageIndex].InitializeReservedCounts(transformFeedbackDescriptors != null, vertexToCompute);
+
                     ShaderProgram program;
 
+                    bool asCompute = (stageIndex == 0 && vertexToCompute) || (stageIndex == 3 && geometryToCompute);
+
                     if (stageIndex == 0 && translatorContexts[0] != null)
                     {
                         TranslatedShaderVertexPair translatedShader = TranslateShader(
@@ -378,7 +393,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
                             currentStage,
                             translatorContexts[0],
                             cachedGuestCode.VertexACode,
-                            cachedGuestCode.VertexBCode);
+                            cachedGuestCode.VertexBCode,
+                            asCompute);
 
                         shaders[0] = translatedShader.VertexA;
                         shaders[1] = translatedShader.VertexB;
@@ -388,12 +404,31 @@ namespace Ryujinx.Graphics.Gpu.Shader
                     {
                         byte[] code = cachedGuestCode.GetByIndex(stageIndex);
 
-                        TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code);
+                        TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code, asCompute);
 
                         shaders[stageIndex + 1] = translatedShader.Shader;
                         program = translatedShader.Program;
                     }
 
+                    if (asCompute)
+                    {
+                        bool tfEnabled = transformFeedbackDescriptors != null;
+
+                        if (stageIndex == 0)
+                        {
+                            vertexAsCompute = CreateHostVertexAsComputeProgram(program, currentStage, tfEnabled);
+
+                            TranslatorContext lastInVertexPipeline = geometryToCompute ? translatorContexts[4] ?? currentStage : currentStage;
+
+                            program = lastInVertexPipeline.GenerateVertexPassthroughForCompute();
+                        }
+                        else
+                        {
+                            geometryAsCompute = CreateHostVertexAsComputeProgram(program, currentStage, tfEnabled);
+                            program = null;
+                        }
+                    }
+
                     if (program != null)
                     {
                         shaderSources.Add(CreateShaderSource(program));
@@ -418,46 +453,81 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, info);
 
-            gpShaders = new CachedShaderProgram(hostProgram, specState, shaders);
+            gpShaders = new(hostProgram, vertexAsCompute, geometryAsCompute, specState, shaders);
 
             _graphicsShaderCache.Add(gpShaders);
-            EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray);
+
+            // We don't currently support caching shaders that have been converted to compute.
+            if (vertexAsCompute == null)
+            {
+                EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray);
+            }
+
             _gpPrograms[addresses] = gpShaders;
 
             return gpShaders;
         }
 
         /// <summary>
-        /// Tries to eliminate the geometry stage from the array of translator contexts.
+        /// Checks if a vertex shader should be converted to a compute shader due to it making use of
+        /// features that are not supported on the host.
         /// </summary>
-        /// <param name="translatorContexts">Array of translator contexts</param>
-        public static void TryRemoveGeometryStage(TranslatorContext[] translatorContexts)
+        /// <param name="context">GPU context of the shader</param>
+        /// <param name="vertexHasStore">Whether the vertex shader has image or storage buffer store operations</param>
+        /// <param name="geometryHasStore">Whether the geometry shader has image or storage buffer store operations, if one exists</param>
+        /// <param name="hasGeometryShader">Whether a geometry shader exists</param>
+        /// <returns>True if the vertex shader should be converted to compute, false otherwise</returns>
+        public static bool ShouldConvertVertexToCompute(GpuContext context, bool vertexHasStore, bool geometryHasStore, bool hasGeometryShader)
         {
-            if (translatorContexts[4] != null)
+            // If the host does not support store operations on vertex,
+            // we need to emulate it on a compute shader.
+            if (!context.Capabilities.SupportsVertexStoreAndAtomics && vertexHasStore)
             {
-                // We have a geometry shader, but geometry shaders are not supported.
-                // Try to eliminate the geometry shader.
+                return true;
+            }
 
-                ShaderProgramInfo info = translatorContexts[4].Translate().Info;
+            // If any stage after the vertex stage is converted to compute,
+            // we need to convert vertex to compute too.
+            return hasGeometryShader && ShouldConvertGeometryToCompute(context, geometryHasStore);
+        }
 
-                if (info.Identification == ShaderIdentification.GeometryLayerPassthrough)
-                {
-                    // We managed to identify that this geometry shader is only used to set the output Layer value,
-                    // we can set the Layer on the previous stage instead (usually the vertex stage) and eliminate it.
+        /// <summary>
+        /// Checks if a geometry shader should be converted to a compute shader due to it making use of
+        /// features that are not supported on the host.
+        /// </summary>
+        /// <param name="context">GPU context of the shader</param>
+        /// <param name="geometryHasStore">Whether the geometry shader has image or storage buffer store operations, if one exists</param>
+        /// <returns>True if the geometry shader should be converted to compute, false otherwise</returns>
+        public static bool ShouldConvertGeometryToCompute(GpuContext context, bool geometryHasStore)
+        {
+            return (!context.Capabilities.SupportsVertexStoreAndAtomics && geometryHasStore) ||
+                   !context.Capabilities.SupportsGeometryShader;
+        }
 
-                    for (int i = 3; i >= 1; i--)
-                    {
-                        if (translatorContexts[i] != null)
-                        {
-                            translatorContexts[i].SetGeometryShaderLayerInputAttribute(info.GpLayerInputAttribute);
-                            translatorContexts[i].SetLastInVertexPipeline();
-                            break;
-                        }
-                    }
+        /// <summary>
+        /// Checks if it might be necessary for any vertex, tessellation or geometry shader to be converted to compute,
+        /// based on the supported host features.
+        /// </summary>
+        /// <param name="capabilities">Host capabilities</param>
+        /// <returns>True if the possibility of a shader being converted to compute exists, false otherwise</returns>
+        public static bool MayConvertVtgToCompute(ref Capabilities capabilities)
+        {
+            return !capabilities.SupportsVertexStoreAndAtomics || !capabilities.SupportsGeometryShader;
+        }
 
-                    translatorContexts[4] = null;
-                }
-            }
+        /// <summary>
+        /// Creates a compute shader from a vertex, tessellation or geometry shader that has been converted to compute.
+        /// </summary>
+        /// <param name="program">Shader program</param>
+        /// <param name="context">Translation context of the shader</param>
+        /// <param name="tfEnabled">Whether transform feedback is enabled</param>
+        /// <returns>Compute shader</returns>
+        private ShaderAsCompute CreateHostVertexAsComputeProgram(ShaderProgram program, TranslatorContext context, bool tfEnabled)
+        {
+            ShaderSource source = new(program.Code, program.BinaryCode, ShaderStage.Compute, program.Language);
+            ShaderInfo info = ShaderInfoBuilder.BuildForVertexAsCompute(_context, program.Info, tfEnabled);
+
+            return new(_context.Renderer.CreateProgram(new[] { source }, info), program.Info, context.GetResourceReservations());
         }
 
         /// <summary>
@@ -573,9 +643,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 }
             }
 
+            bool vertexAsCompute = gpShaders.VertexAsCompute != null;
             bool usesDrawParameters = gpShaders.Shaders[1]?.Info.UsesDrawParameters ?? false;
 
-            return gpShaders.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true);
+            return gpShaders.SpecializationState.MatchesGraphics(
+                channel,
+                ref poolState,
+                ref graphicsState,
+                vertexAsCompute,
+                usesDrawParameters,
+                checkTextures: true);
         }
 
         /// <summary>
@@ -636,6 +713,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="vertexA">Optional translator context of the shader that should be combined</param>
         /// <param name="codeA">Optional Maxwell binary code of the Vertex A shader, if present</param>
         /// <param name="codeB">Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache</param>
+        /// <param name="asCompute">Indicates that the vertex shader should be converted to a compute shader</param>
         /// <returns>Compiled graphics shader code</returns>
         private static TranslatedShaderVertexPair TranslateShader(
             ShaderDumper dumper,
@@ -643,7 +721,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
             TranslatorContext currentStage,
             TranslatorContext vertexA,
             byte[] codeA,
-            byte[] codeB)
+            byte[] codeB,
+            bool asCompute)
         {
             ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1);
 
@@ -663,7 +742,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 pathsB = dumper.Dump(codeB, compute: false);
             }
 
-            ShaderProgram program = currentStage.Translate(vertexA);
+            ShaderProgram program = currentStage.Translate(vertexA, asCompute);
 
             pathsB.Prepend(program);
             pathsA.Prepend(program);
@@ -681,8 +760,9 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="channel">GPU channel using the shader</param>
         /// <param name="context">Translator context of the stage to be translated</param>
         /// <param name="code">Optional Maxwell binary code of the current stage shader, if present on cache</param>
+        /// <param name="asCompute">Indicates that the vertex shader should be converted to a compute shader</param>
         /// <returns>Compiled graphics shader code</returns>
-        private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code)
+        private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code, bool asCompute)
         {
             var memoryManager = channel.MemoryManager;
 
@@ -694,7 +774,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
             code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray();
 
             ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default;
-            ShaderProgram program = context.Translate();
+            ShaderProgram program = context.Translate(asCompute);
 
             paths.Prepend(program);
 
-- 
cgit v1.2.3-70-g09d2