aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorriperiperi <rhy3756547@hotmail.com>2023-10-20 14:05:09 +0100
committerGitHub <noreply@github.com>2023-10-20 15:05:09 +0200
commit76b53e018a2e867899dbce2f3ce5173bbc4eed22 (patch)
tree0a47217372edba98d836a31d3980c417df4704a7
parent28dd7d80af56701887dbb538b56aa58edaf39d91 (diff)
GPU: Add fallback when textureGatherOffsets is not supported (#5792)1.1.1054
* GPU: Add fallback when textureGatherOffsets is not supported. This PR adds a fallback for GPUs or APIs that don't support an equivalent to the method `textureGatherOffsets`, where each of the 4 gathered texels has an individual offset. This is done by reusing the existing code to handle non-const offsets for texture instructions, though it has also been corrected as there were a few implementation issues. MoltenVK reports support for this capability, and it didn't error when we initially released the MacOS build, but that has since changed. MVK still reports support, but spirv-cross has been fixed in a way that it _attempts_ to use this capability, but the metal compiler errors since it doesn't exist. Some other fixes: - textureGatherOffsets emulation has been changed significantly. It now uses 4 texture sample instructions (not gather), calculates a base texel (i=0 j=0) and adds the offsets onto it before converting into a tex coord. The final result is offset into a texel center, so it shouldn't be subject to interpolation, though this isn't perfect and could have some error with floating point formats with linear sampling. It is subject to texture wrap mode as it should be, which is why texelFetch was not used. - Maybe gather should be used here with component `w` (i=0, j=0), though this multiplies number of texels fetched by 4... The way it was doing this before _was_ wrong_, but doing it right would avoid issues with texel center precision. - textureGatherOffset (singular) now performs textureGather with the offset applied to the coords, rather than the slower fallback where each texel is fetched individually. * Increment shader cache version, remove unused arg * Use base texture size for gather coord offset. Implicit LOD for gather is not supported. * Use 4 texture gathers for offsets emulation Avoids issues with interpolation at cost of performance (not sure how bad this is) * Address Feedback
-rw-r--r--src/Ryujinx.Graphics.GAL/Capabilities.cs3
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs2
-rw-r--r--src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs2
-rw-r--r--src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs1
-rw-r--r--src/Ryujinx.Graphics.Shader/IGpuAccessor.cs9
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs64
-rw-r--r--src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs1
7 files changed, 71 insertions, 11 deletions
diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs
index 75642204..8959bf93 100644
--- a/src/Ryujinx.Graphics.GAL/Capabilities.cs
+++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs
@@ -38,6 +38,7 @@ namespace Ryujinx.Graphics.GAL
public readonly bool SupportsShaderBallot;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsShaderFloat64;
+ public readonly bool SupportsTextureGatherOffsets;
public readonly bool SupportsTextureShadowLod;
public readonly bool SupportsVertexStoreAndAtomics;
public readonly bool SupportsViewportIndexVertexTessellation;
@@ -92,6 +93,7 @@ namespace Ryujinx.Graphics.GAL
bool supportsShaderBallot,
bool supportsShaderBarrierDivergence,
bool supportsShaderFloat64,
+ bool supportsTextureGatherOffsets,
bool supportsTextureShadowLod,
bool supportsVertexStoreAndAtomics,
bool supportsViewportIndexVertexTessellation,
@@ -142,6 +144,7 @@ namespace Ryujinx.Graphics.GAL
SupportsShaderBallot = supportsShaderBallot;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsShaderFloat64 = supportsShaderFloat64;
+ SupportsTextureGatherOffsets = supportsTextureGatherOffsets;
SupportsTextureShadowLod = supportsTextureShadowLod;
SupportsVertexStoreAndAtomics = supportsVertexStoreAndAtomics;
SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation;
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
index 0f1aa6a9..0dc4b1a7 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
- private const uint CodeGenVersion = 5767;
+ private const uint CodeGenVersion = 5791;
private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data";
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
index 9d030cd6..a5b31363 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
@@ -186,6 +186,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;
+ public bool QueryHostSupportsTextureGatherOffsets() => _context.Capabilities.SupportsTextureGatherOffsets;
+
public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod;
public bool QueryHostSupportsTransformFeedback() => _context.Capabilities.SupportsTransformFeedback;
diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
index 3eba15e3..667ea782 100644
--- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
+++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
@@ -163,6 +163,7 @@ namespace Ryujinx.Graphics.OpenGL
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
supportsShaderFloat64: true,
+ supportsTextureGatherOffsets: true,
supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
supportsVertexStoreAndAtomics: true,
supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,
diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
index 4dc75a3e..29a5435e 100644
--- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
+++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
@@ -340,6 +340,15 @@ namespace Ryujinx.Graphics.Shader
}
/// <summary>
+ /// Queries host GPU texture gather with multiple offsets support.
+ /// </summary>
+ /// <returns>True if the GPU and driver supports texture gather offsets, false otherwise</returns>
+ bool QueryHostSupportsTextureGatherOffsets()
+ {
+ return true;
+ }
+
+ /// <summary>
/// Queries host GPU texture shadow LOD support.
/// </summary>
/// <returns>True if the GPU and driver supports texture shadow LOD, false otherwise</returns>
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs
index dbfe6269..495ea8a9 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs
@@ -303,7 +303,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
- bool hasInvalidOffset = (hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset();
+ bool needsOffsetsEmulation = hasOffsets && !gpuAccessor.QueryHostSupportsTextureGatherOffsets();
+
+ bool hasInvalidOffset = needsOffsetsEmulation || ((hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset());
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
@@ -402,11 +404,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
offsets[index] = offset;
}
- hasInvalidOffset &= !areAllOffsetsConstant;
-
- if (!hasInvalidOffset)
+ if (!needsOffsetsEmulation)
{
- return node;
+ hasInvalidOffset &= !areAllOffsetsConstant;
+
+ if (!hasInvalidOffset)
+ {
+ return node;
+ }
}
if (hasLodBias)
@@ -434,13 +439,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
LinkedListNode<INode> oldNode = node;
- if (isGather && !isShadow)
+ if (isGather && !isShadow && hasOffsets)
{
Operand[] newSources = new Operand[sources.Length];
sources.CopyTo(newSources, 0);
- Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage);
+ Operand[] texSizes = InsertTextureBaseSize(node, texOp, bindlessHandle, coordsCount);
int destIndex = 0;
@@ -455,7 +460,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
Operand offset = Local();
- Operand intOffset = offsets[index + (hasOffsets ? compIndex * coordsCount : 0)];
+ Operand intOffset = offsets[index + compIndex * coordsCount];
node.List.AddBefore(node, new Operation(
Instruction.FP32 | Instruction.Divide,
@@ -478,7 +483,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
texOp.Format,
texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
texOp.Binding,
- 1,
+ 1 << 3, // W component: i=0, j=0
new[] { dests[destIndex++] },
newSources);
@@ -502,7 +507,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
}
else
{
- Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage);
+ Operand[] texSizes = isGather
+ ? InsertTextureBaseSize(node, texOp, bindlessHandle, coordsCount)
+ : InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage);
for (int index = 0; index < coordsCount; index++)
{
@@ -549,6 +556,43 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
return node;
}
+ private static Operand[] InsertTextureBaseSize(
+ LinkedListNode<INode> node,
+ TextureOperation texOp,
+ Operand bindlessHandle,
+ int coordsCount)
+ {
+ Operand[] texSizes = new Operand[coordsCount];
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ texSizes[index] = Local();
+
+ Operand[] texSizeSources;
+
+ if (bindlessHandle != null)
+ {
+ texSizeSources = new Operand[] { bindlessHandle, Const(0) };
+ }
+ else
+ {
+ texSizeSources = new Operand[] { Const(0) };
+ }
+
+ node.List.AddBefore(node, new TextureOperation(
+ Instruction.TextureQuerySize,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.Binding,
+ index,
+ new[] { texSizes[index] },
+ texSizeSources));
+ }
+
+ return texSizes;
+ }
+
private static Operand[] InsertTextureLod(
LinkedListNode<INode> node,
TextureOperation texOp,
diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
index a483dc59..ab8e6137 100644
--- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
+++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
@@ -605,6 +605,7 @@ namespace Ryujinx.Graphics.Vulkan
supportsShaderBallot: false,
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
+ supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,
supportsTextureShadowLod: false,
supportsVertexStoreAndAtomics: features2.Features.VertexPipelineStoresAndAtomics,
supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,