From aa021085cfab10ab63a7e6c2f9c9e29b4111525c Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Fri, 5 May 2023 11:20:20 -0300
Subject: Allow any shader SSBO constant buffer slot and offset (#2237)

* Allow any shader SSBO constant buffer slot and offset

* Fix slot value passed to SetUsedStorageBuffer on fallback case

* Shader cache version

* Ensure that the storage buffer source constant buffer offset is word aligned

* Fix FirstBinding on GetUniformBufferDescriptors
---
 .../Engine/Compute/ComputeClass.cs                 |   7 +-
 .../Engine/Threed/StateUpdater.cs                  |   7 +-
 .../Shader/DiskCache/DiskCacheHostStorage.cs       |   2 +-
 src/Ryujinx.Graphics.Shader/BufferDescriptor.cs    |  18 +++-
 src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs    |   2 +-
 .../Translation/GlobalMemory.cs                    |   2 +
 .../Translation/Optimizations/GlobalToStorage.cs   | 104 +++++++++++++++------
 .../Translation/Optimizations/Optimizer.cs         |   1 -
 .../Translation/Rewriter.cs                        |  11 ++-
 .../Translation/ShaderConfig.cs                    |  92 +++++++++++++++++-
 10 files changed, 194 insertions(+), 52 deletions(-)

(limited to 'src')

diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
index 2ac738fd..4ec23c79 100644
--- a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs
@@ -157,11 +157,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
             {
                 BufferDescriptor sb = info.SBuffers[index];
 
-                ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
-
-                int sbDescOffset = 0x310 + sb.Slot * 0x10;
-
-                sbDescAddress += (ulong)sbDescOffset;
+                ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(sb.SbCbSlot);
+                sbDescAddress += (ulong)sb.SbCbOffset * 4;
 
                 SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
 
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
index 00e09a31..1c9bf1d2 100644
--- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
+++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs
@@ -351,11 +351,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
                 {
                     BufferDescriptor sb = info.SBuffers[index];
 
-                    ulong sbDescAddress = _channel.BufferManager.GetGraphicsUniformBufferAddress(stage, 0);
-
-                    int sbDescOffset = 0x110 + stage * 0x100 + sb.Slot * 0x10;
-
-                    sbDescAddress += (ulong)sbDescOffset;
+                    ulong sbDescAddress = _channel.BufferManager.GetGraphicsUniformBufferAddress(stage, sb.SbCbSlot);
+                    sbDescAddress += (ulong)sb.SbCbOffset * 4;
 
                     SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
 
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
index b182f299..85233c0a 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         private const ushort FileFormatVersionMajor = 1;
         private const ushort FileFormatVersionMinor = 2;
         private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
-        private const uint CodeGenVersion = 4735;
+        private const uint CodeGenVersion = 2237;
 
         private const string SharedTocFileName = "shared.toc";
         private const string SharedDataFileName = "shared.data";
diff --git a/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs b/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs
index 4ce8a896..410c1991 100644
--- a/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs
+++ b/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs
@@ -5,13 +5,27 @@ namespace Ryujinx.Graphics.Shader
         // New fields should be added to the end of the struct to keep disk shader cache compatibility.
 
         public readonly int Binding;
-        public readonly int Slot;
+        public readonly byte Slot;
+        public readonly byte SbCbSlot;
+        public readonly ushort SbCbOffset;
         public BufferUsageFlags Flags;
 
         public BufferDescriptor(int binding, int slot)
         {
             Binding = binding;
-            Slot = slot;
+            Slot = (byte)slot;
+            SbCbSlot = 0;
+            SbCbOffset = 0;
+
+            Flags = BufferUsageFlags.None;
+        }
+
+        public BufferDescriptor(int binding, int slot, int sbCbSlot, int sbCbOffset)
+        {
+            Binding = binding;
+            Slot = (byte)slot;
+            SbCbSlot = (byte)sbCbSlot;
+            SbCbOffset = (ushort)sbCbOffset;
 
             Flags = BufferUsageFlags.None;
         }
diff --git a/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs b/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs
index 657546cb..ab81d575 100644
--- a/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs
+++ b/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs
@@ -6,7 +6,7 @@ namespace Ryujinx.Graphics.Shader
     /// Flags that indicate how a buffer will be used in a shader.
     /// </summary>
     [Flags]
-    public enum BufferUsageFlags
+    public enum BufferUsageFlags : byte
     {
         None = 0,
 
diff --git a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
index 774a128d..a81d0fc4 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
@@ -16,6 +16,8 @@ namespace Ryujinx.Graphics.Shader.Translation
         public const int UbeDescsSize  = StorageDescSize * UbeMaxCount;
         public const int UbeFirstCbuf  = 8;
 
+        public const int DriverReservedCb = 0;
+
         public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind)
         {
             return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) ||
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
index 2a4070e0..a8368244 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
@@ -8,6 +8,20 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
 {
     static class GlobalToStorage
     {
+        private struct SearchResult
+        {
+            public static SearchResult NotFound => new SearchResult(-1, 0);
+            public bool Found => SbCbSlot != -1;
+            public int SbCbSlot { get; }
+            public int SbCbOffset { get; }
+
+            public SearchResult(int sbCbSlot, int sbCbOffset)
+            {
+                SbCbSlot = sbCbSlot;
+                SbCbOffset = sbCbOffset;
+            }
+        }
+
         public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask)
         {
             int sbStart = GetStorageBaseCbOffset(config.Stage);
@@ -49,30 +63,33 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
                 {
                     Operand source = operation.GetSource(0);
 
-                    int storageIndex = SearchForStorageBase(block, source, sbStart, sbEnd);
-
-                    if (storageIndex >= 0)
+                    var result = SearchForStorageBase(config, block, source);
+                    if (!result.Found)
                     {
-                        // Storage buffers are implemented using global memory access.
-                        // If we know from where the base address of the access is loaded,
-                        // we can guess which storage buffer it is accessing.
-                        // We can then replace the global memory access with a storage
-                        // buffer access.
-                        node = ReplaceGlobalWithStorage(block, node, config, storageIndex);
+                        continue;
                     }
-                    else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
+
+                    if (config.Stage == ShaderStage.Compute &&
+                        operation.Inst == Instruction.LoadGlobal &&
+                        result.SbCbSlot == DriverReservedCb &&
+                        result.SbCbOffset >= UbeBaseOffset &&
+                        result.SbCbOffset < UbeBaseOffset + UbeDescsSize)
                     {
                         // Here we effectively try to replace a LDG instruction with LDC.
                         // The hardware only supports a limited amount of constant buffers
                         // so NVN "emulates" more constant buffers using global memory access.
                         // Here we try to replace the global access back to a constant buffer
                         // load.
-                        storageIndex = SearchForStorageBase(block, source, ubeStart, ubeStart + ubeEnd);
-
-                        if (storageIndex >= 0)
-                        {
-                            node = ReplaceLdgWithLdc(node, config, storageIndex);
-                        }
+                        node = ReplaceLdgWithLdc(node, config, (result.SbCbOffset - UbeBaseOffset) / StorageDescSize);
+                    }
+                    else
+                    {
+                        // Storage buffers are implemented using global memory access.
+                        // If we know from where the base address of the access is loaded,
+                        // we can guess which storage buffer it is accessing.
+                        // We can then replace the global memory access with a storage
+                        // buffer access.
+                        node = ReplaceGlobalWithStorage(block, node, config, config.GetSbSlot((byte)result.SbCbSlot, (ushort)result.SbCbOffset));
                     }
                 }
             }
@@ -159,7 +176,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
 
             if (byteOffset == null)
             {
-                Operand baseAddrLow = Cbuf(0, baseAddressCbOffset);
+                (int sbCbSlot, int sbCbOffset) = config.GetSbCbInfo(storageIndex);
+
+                Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset);
                 Operand baseAddrTrunc = Local();
 
                 Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
@@ -360,20 +379,20 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
             return node;
         }
 
-        private static int SearchForStorageBase(BasicBlock block, Operand globalAddress, int sbStart, int sbEnd)
+        private static SearchResult SearchForStorageBase(ShaderConfig config, BasicBlock block, Operand globalAddress)
         {
             globalAddress = Utils.FindLastOperation(globalAddress, block);
 
             if (globalAddress.Type == OperandType.ConstantBuffer)
             {
-                return GetStorageIndex(globalAddress, sbStart, sbEnd);
+                return GetStorageIndex(config, globalAddress);
             }
 
             Operation operation = globalAddress.AsgOp as Operation;
 
             if (operation == null || operation.Inst != Instruction.Add)
             {
-                return -1;
+                return SearchResult.NotFound;
             }
 
             Operand src1 = operation.GetSource(0);
@@ -382,34 +401,65 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
             if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) ||
                 (src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant))
             {
+                Operand baseAddr;
+
                 if (src1.Type == OperandType.LocalVariable)
                 {
-                    operation = Utils.FindLastOperation(src1, block).AsgOp as Operation;
+                    baseAddr = Utils.FindLastOperation(src1, block);
                 }
                 else
                 {
-                    operation = Utils.FindLastOperation(src2, block).AsgOp as Operation;
+                    baseAddr = Utils.FindLastOperation(src2, block);
+                }
+
+                var result = GetStorageIndex(config, baseAddr);
+                if (result.Found)
+                {
+                    return result;
                 }
 
+                operation = baseAddr.AsgOp as Operation;
+
                 if (operation == null || operation.Inst != Instruction.Add)
                 {
-                    return -1;
+                    return SearchResult.NotFound;
                 }
             }
 
+            var selectedResult = SearchResult.NotFound;
+
             for (int index = 0; index < operation.SourcesCount; index++)
             {
                 Operand source = operation.GetSource(index);
 
-                int storageIndex = GetStorageIndex(source, sbStart, sbEnd);
+                var result = GetStorageIndex(config, source);
 
-                if (storageIndex != -1)
+                // If we already have a result, we give preference to the ones from
+                // the driver reserved constant buffer, as those are the ones that
+                // contains the base address.
+                if (result.Found && (!selectedResult.Found || result.SbCbSlot == GlobalMemory.DriverReservedCb))
                 {
-                    return storageIndex;
+                    selectedResult = result;
                 }
             }
 
-            return -1;
+            return selectedResult;
+        }
+
+        private static SearchResult GetStorageIndex(ShaderConfig config, Operand operand)
+        {
+            if (operand.Type == OperandType.ConstantBuffer)
+            {
+                int slot = operand.GetCbufSlot();
+                int offset = operand.GetCbufOffset();
+
+                if ((offset & 3) == 0)
+                {
+                    return new SearchResult(slot, offset);
+                }
+            }
+
+            return SearchResult.NotFound;
         }
 
         private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd)
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
index bae774ee..16848bdc 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
@@ -68,7 +68,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
                         }
 
                         ConstantFolding.RunPass(operation);
-
                         Simplification.RunPass(operation);
 
                         if (DestIsLocalVar(operation))
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
index 91e7ace1..8167efc1 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
@@ -110,9 +110,9 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow)
             {
-                baseAddrLow = Cbuf(0, cbOffset);
-                Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
-                Operand size = Cbuf(0, cbOffset + 2);
+                baseAddrLow          = Cbuf(DriverReservedCb, cbOffset);
+                Operand baseAddrHigh = Cbuf(DriverReservedCb, cbOffset + 1);
+                Operand size         = Cbuf(DriverReservedCb, cbOffset + 2);
 
                 Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
                 Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
@@ -134,9 +134,10 @@ namespace Ryujinx.Graphics.Shader.Translation
 
                 sbUseMask &= ~(1 << slot);
 
-                config.SetUsedStorageBuffer(slot, isWrite);
-
                 int cbOffset = GetStorageCbOffset(config.Stage, slot);
+                slot = config.GetSbSlot(DriverReservedCb, (ushort)cbOffset);
+
+                config.SetUsedStorageBuffer(slot, isWrite);
 
                 Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
 
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
index 22f5a671..ae60bcc6 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
@@ -125,6 +125,9 @@ namespace Ryujinx.Graphics.Shader.Translation
         private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures;
         private readonly Dictionary<TextureInfo, TextureMeta> _usedImages;
 
+        private readonly Dictionary<int, int> _sbSlots;
+        private readonly Dictionary<int, int> _sbSlotsReverse;
+
         private BufferDescriptor[] _cachedConstantBufferDescriptors;
         private BufferDescriptor[] _cachedStorageBufferDescriptors;
         private TextureDescriptor[] _cachedTextureDescriptors;
@@ -152,6 +155,9 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             _usedTextures = new Dictionary<TextureInfo, TextureMeta>();
             _usedImages   = new Dictionary<TextureInfo, TextureMeta>();
+
+            _sbSlots        = new Dictionary<int, int>();
+            _sbSlotsReverse = new Dictionary<int, int>();
         }
 
         public ShaderConfig(
@@ -770,9 +776,8 @@ namespace Ryujinx.Graphics.Shader.Translation
                 usedMask |= (int)GpuAccessor.QueryConstantBufferUse();
             }
 
-            return _cachedConstantBufferDescriptors = GetBufferDescriptors(
+            return _cachedConstantBufferDescriptors = GetUniformBufferDescriptors(
                 usedMask,
-                0,
                 UsedFeatures.HasFlag(FeatureFlags.CbIndexing),
                 out _firstConstantBufferBinding,
                 GpuAccessor.QueryBindingConstantBuffer);
@@ -785,7 +790,7 @@ namespace Ryujinx.Graphics.Shader.Translation
                 return _cachedStorageBufferDescriptors;
             }
 
-            return _cachedStorageBufferDescriptors = GetBufferDescriptors(
+            return _cachedStorageBufferDescriptors = GetStorageBufferDescriptors(
                 _usedStorageBuffers,
                 _usedStorageBuffersWrite,
                 true,
@@ -793,7 +798,48 @@ namespace Ryujinx.Graphics.Shader.Translation
                 GpuAccessor.QueryBindingStorageBuffer);
         }
 
-        private static BufferDescriptor[] GetBufferDescriptors(
+        private static BufferDescriptor[] GetUniformBufferDescriptors(int usedMask, bool isArray, out int firstBinding, Func<int, int> getBindingCallback)
+        {
+            firstBinding = 0;
+            int lastSlot = -1;
+            bool hasFirstBinding = false;
+            var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)];
+
+            for (int i = 0; i < descriptors.Length; i++)
+            {
+                int slot = BitOperations.TrailingZeroCount(usedMask);
+
+                if (isArray)
+                {
+                    // The next array entries also consumes bindings, even if they are unused.
+                    for (int j = lastSlot + 1; j < slot; j++)
+                    {
+                        int binding = getBindingCallback(j);
+
+                        if (!hasFirstBinding)
+                        {
+                            firstBinding = binding;
+                            hasFirstBinding = true;
+                        }
+                    }
+                }
+
+                lastSlot = slot;
+                descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot);
+
+                if (!hasFirstBinding)
+                {
+                    firstBinding = descriptors[i].Binding;
+                    hasFirstBinding = true;
+                }
+
+                usedMask &= ~(1 << slot);
+            }
+
+            return descriptors;
+        }
+
+        private BufferDescriptor[] GetStorageBufferDescriptors(
             int usedMask,
             int writtenMask,
             bool isArray,
@@ -827,7 +873,9 @@ namespace Ryujinx.Graphics.Shader.Translation
 
                 lastSlot = slot;
 
-                descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot);
+                (int sbCbSlot, int sbCbOffset) = GetSbCbInfo(slot);
+
+                descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot, sbCbSlot, sbCbOffset);
 
                 if (!hasFirstBinding)
                 {
@@ -924,6 +972,40 @@ namespace Ryujinx.Graphics.Shader.Translation
             return FindDescriptorIndex(GetImageDescriptors(), texOp);
         }
 
+        public int GetSbSlot(byte sbCbSlot, ushort sbCbOffset)
+        {
+            int key = PackSbCbInfo(sbCbSlot, sbCbOffset);
+
+            if (!_sbSlots.TryGetValue(key, out int slot))
+            {
+                slot = _sbSlots.Count;
+                _sbSlots.Add(key, slot);
+                _sbSlotsReverse.Add(slot, key);
+            }
+
+            return slot;
+        }
+
+        public (int, int) GetSbCbInfo(int slot)
+        {
+            if (_sbSlotsReverse.TryGetValue(slot, out int key))
+            {
+                return UnpackSbCbInfo(key);
+            }
+
+            throw new ArgumentException($"Invalid slot {slot}.", nameof(slot));
+        }
+
+        private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset)
+        {
+            return sbCbOffset | ((int)sbCbSlot << 16);
+        }
+
+        private static (int, int) UnpackSbCbInfo(int key)
+        {
+            return ((byte)(key >> 16), (ushort)key);
+        }
+
         public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None)
         {
             return new ShaderProgramInfo(
-- 
cgit v1.2.3-70-g09d2