aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Shader/Translation/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/Translation/Transforms')
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Transforms/DrawParametersReplace.cs93
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Transforms/ForcePreciseEnable.cs36
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Transforms/ITransformPass.cs11
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedAtomicSignedCas.cs58
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedStoreSmallIntCas.cs57
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs702
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs41
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Transforms/VectorComponentSelect.cs96
8 files changed, 1094 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/DrawParametersReplace.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/DrawParametersReplace.cs
new file mode 100644
index 00000000..9e73013d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/DrawParametersReplace.cs
@@ -0,0 +1,93 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Transforms
+{
+ class DrawParametersReplace : ITransformPass
+ {
+ public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
+ {
+ return stage == ShaderStage.Vertex;
+ }
+
+ public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
+ {
+ Operation operation = (Operation)node.Value;
+
+ if (context.GpuAccessor.QueryHasConstantBufferDrawParameters())
+ {
+ if (ReplaceConstantBufferWithDrawParameters(node, operation))
+ {
+ context.UsedFeatures |= FeatureFlags.DrawParameters;
+ }
+ }
+ else if (HasConstantBufferDrawParameters(operation))
+ {
+ context.UsedFeatures |= FeatureFlags.DrawParameters;
+ }
+
+ return node;
+ }
+
+ private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation)
+ {
+ Operand GenerateLoad(IoVariable ioVariable)
+ {
+ Operand value = Local();
+ node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable)));
+ return value;
+ }
+
+ bool modified = false;
+
+ for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+ {
+ Operand src = operation.GetSource(srcIndex);
+
+ if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
+ {
+ switch (src.GetCbufOffset())
+ {
+ case Constants.NvnBaseVertexByteOffset / 4:
+ operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex));
+ modified = true;
+ break;
+ case Constants.NvnBaseInstanceByteOffset / 4:
+ operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance));
+ modified = true;
+ break;
+ case Constants.NvnDrawIndexByteOffset / 4:
+ operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex));
+ modified = true;
+ break;
+ }
+ }
+ }
+
+ return modified;
+ }
+
+ private static bool HasConstantBufferDrawParameters(Operation operation)
+ {
+ for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+ {
+ Operand src = operation.GetSource(srcIndex);
+
+ if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
+ {
+ switch (src.GetCbufOffset())
+ {
+ case Constants.NvnBaseVertexByteOffset / 4:
+ case Constants.NvnBaseInstanceByteOffset / 4:
+ case Constants.NvnDrawIndexByteOffset / 4:
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/ForcePreciseEnable.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/ForcePreciseEnable.cs
new file mode 100644
index 00000000..6b7e1410
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/ForcePreciseEnable.cs
@@ -0,0 +1,36 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.Translation.Transforms
+{
+ class ForcePreciseEnable : ITransformPass
+ {
+ public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
+ {
+ return stage == ShaderStage.Fragment && gpuAccessor.QueryHostReducedPrecision();
+ }
+
+ public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
+ {
+ // There are some cases where a small bias is added to values to prevent division by zero.
+ // When operating with reduced precision, it is possible for this bias to get rounded to 0
+ // and cause a division by zero.
+ // To prevent that, we force those operations to be precise even if the host wants
+ // imprecise operations for performance.
+
+ Operation operation = (Operation)node.Value;
+
+ if (operation.Inst == (Instruction.FP32 | Instruction.Divide) &&
+ operation.GetSource(0).Type == OperandType.Constant &&
+ operation.GetSource(0).AsFloat() == 1f &&
+ operation.GetSource(1).AsgOp is Operation addOp &&
+ addOp.Inst == (Instruction.FP32 | Instruction.Add) &&
+ addOp.GetSource(1).Type == OperandType.Constant)
+ {
+ addOp.ForcePrecise = true;
+ }
+
+ return node;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/ITransformPass.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/ITransformPass.cs
new file mode 100644
index 00000000..0a109d1d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/ITransformPass.cs
@@ -0,0 +1,11 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.Translation.Transforms
+{
+ interface ITransformPass
+ {
+ abstract static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures);
+ abstract static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node);
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedAtomicSignedCas.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedAtomicSignedCas.cs
new file mode 100644
index 00000000..112b3b19
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedAtomicSignedCas.cs
@@ -0,0 +1,58 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation.Optimizations;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Transforms
+{
+ class SharedAtomicSignedCas : ITransformPass
+ {
+ public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
+ {
+ return targetLanguage != TargetLanguage.Spirv && stage == ShaderStage.Compute && usedFeatures.HasFlag(FeatureFlags.SharedMemory);
+ }
+
+ public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
+ {
+ Operation operation = (Operation)node.Value;
+ HelperFunctionName name;
+
+ if (operation.Inst == Instruction.AtomicMaxS32)
+ {
+ name = HelperFunctionName.SharedAtomicMaxS32;
+ }
+ else if (operation.Inst == Instruction.AtomicMinS32)
+ {
+ name = HelperFunctionName.SharedAtomicMinS32;
+ }
+ else
+ {
+ return node;
+ }
+
+ if (operation.StorageKind != StorageKind.SharedMemory)
+ {
+ return node;
+ }
+
+ Operand result = operation.Dest;
+ Operand memoryId = operation.GetSource(0);
+ Operand byteOffset = operation.GetSource(1);
+ Operand value = operation.GetSource(2);
+
+ Debug.Assert(memoryId.Type == OperandType.Constant);
+
+ int functionId = context.Hfm.GetOrCreateFunctionId(name, memoryId.Value);
+
+ Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
+
+ LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs));
+
+ Utils.DeleteNode(node, operation);
+
+ return newNode;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedStoreSmallIntCas.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedStoreSmallIntCas.cs
new file mode 100644
index 00000000..e58be0a8
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedStoreSmallIntCas.cs
@@ -0,0 +1,57 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation.Optimizations;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Transforms
+{
+ class SharedStoreSmallIntCas : ITransformPass
+ {
+ public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
+ {
+ return stage == ShaderStage.Compute && usedFeatures.HasFlag(FeatureFlags.SharedMemory);
+ }
+
+ public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
+ {
+ Operation operation = (Operation)node.Value;
+ HelperFunctionName name;
+
+ if (operation.StorageKind == StorageKind.SharedMemory8)
+ {
+ name = HelperFunctionName.SharedStore8;
+ }
+ else if (operation.StorageKind == StorageKind.SharedMemory16)
+ {
+ name = HelperFunctionName.SharedStore16;
+ }
+ else
+ {
+ return node;
+ }
+
+ if (operation.Inst != Instruction.Store)
+ {
+ return node;
+ }
+
+ Operand memoryId = operation.GetSource(0);
+ Operand byteOffset = operation.GetSource(1);
+ Operand value = operation.GetSource(2);
+
+ Debug.Assert(memoryId.Type == OperandType.Constant);
+
+ int functionId = context.Hfm.GetOrCreateFunctionId(name, memoryId.Value);
+
+ Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
+
+ LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs));
+
+ Utils.DeleteNode(node, operation);
+
+ return newNode;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs
new file mode 100644
index 00000000..5ceed4b7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs
@@ -0,0 +1,702 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+using System.Linq;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Transforms
+{
+ class TexturePass : ITransformPass
+ {
+ public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
+ {
+ return true;
+ }
+
+ public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
+ {
+ if (node.Value is TextureOperation texOp)
+ {
+ node = InsertTexelFetchScale(context.Hfm, node, context.ResourceManager, context.Stage);
+ node = InsertTextureSizeUnscale(context.Hfm, node, context.ResourceManager, context.Stage);
+
+ if (texOp.Inst == Instruction.TextureSample)
+ {
+ node = InsertCoordNormalization(context.Hfm, node, context.ResourceManager, context.GpuAccessor, context.Stage);
+ node = InsertCoordGatherBias(node, context.ResourceManager, context.GpuAccessor);
+ node = InsertConstOffsets(node, context.ResourceManager, context.GpuAccessor);
+
+ if (texOp.Type == SamplerType.TextureBuffer && !context.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat())
+ {
+ node = InsertSnormNormalization(node, context.ResourceManager, context.GpuAccessor);
+ }
+ }
+ }
+
+ return node;
+ }
+
+ private static LinkedListNode<INode> InsertTexelFetchScale(
+ HelperFunctionManager hfm,
+ LinkedListNode<INode> node,
+ ResourceManager resourceManager,
+ ShaderStage stage)
+ {
+ TextureOperation texOp = (TextureOperation)node.Value;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+ bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ int coordsCount = texOp.Type.GetDimensions();
+
+ int coordsIndex = isBindless || isIndexed ? 1 : 0;
+
+ bool isImage = IsImageInstructionWithScale(texOp.Inst);
+
+ if ((texOp.Inst == Instruction.TextureSample || isImage) &&
+ (intCoords || isImage) &&
+ !isBindless &&
+ !isIndexed &&
+ stage.SupportsRenderScale() &&
+ TypeSupportsScale(texOp.Type))
+ {
+ int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.TexelFetchScale);
+ int samplerIndex = isImage
+ ? resourceManager.GetTextureDescriptors().Length + resourceManager.FindImageDescriptorIndex(texOp.Binding)
+ : resourceManager.FindTextureDescriptorIndex(texOp.Binding);
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ Operand scaledCoord = Local();
+ Operand[] callArgs;
+
+ if (stage == ShaderStage.Fragment)
+ {
+ callArgs = new Operand[] { Const(functionId), texOp.GetSource(coordsIndex + index), Const(samplerIndex), Const(index) };
+ }
+ else
+ {
+ callArgs = new Operand[] { Const(functionId), texOp.GetSource(coordsIndex + index), Const(samplerIndex) };
+ }
+
+ node.List.AddBefore(node, new Operation(Instruction.Call, 0, scaledCoord, callArgs));
+
+ texOp.SetSource(coordsIndex + index, scaledCoord);
+ }
+ }
+
+ return node;
+ }
+
+ private static LinkedListNode<INode> InsertTextureSizeUnscale(
+ HelperFunctionManager hfm,
+ LinkedListNode<INode> node,
+ ResourceManager resourceManager,
+ ShaderStage stage)
+ {
+ TextureOperation texOp = (TextureOperation)node.Value;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ if (texOp.Inst == Instruction.TextureSize &&
+ texOp.Index < 2 &&
+ !isBindless &&
+ !isIndexed &&
+ stage.SupportsRenderScale() &&
+ TypeSupportsScale(texOp.Type))
+ {
+ int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.TextureSizeUnscale);
+ int samplerIndex = resourceManager.FindTextureDescriptorIndex(texOp.Binding);
+
+ for (int index = texOp.DestsCount - 1; index >= 0; index--)
+ {
+ Operand dest = texOp.GetDest(index);
+
+ Operand unscaledSize = Local();
+
+ // Replace all uses with the unscaled size value.
+ // This must be done before the call is added, since it also is a use of the original size.
+ foreach (INode useOp in dest.UseOps)
+ {
+ for (int srcIndex = 0; srcIndex < useOp.SourcesCount; srcIndex++)
+ {
+ if (useOp.GetSource(srcIndex) == dest)
+ {
+ useOp.SetSource(srcIndex, unscaledSize);
+ }
+ }
+ }
+
+ Operand[] callArgs = new Operand[] { Const(functionId), dest, Const(samplerIndex) };
+
+ node.List.AddAfter(node, new Operation(Instruction.Call, 0, unscaledSize, callArgs));
+ }
+ }
+
+ return node;
+ }
+
+ private static LinkedListNode<INode> InsertCoordNormalization(
+ HelperFunctionManager hfm,
+ LinkedListNode<INode> node,
+ ResourceManager resourceManager,
+ IGpuAccessor gpuAccessor,
+ ShaderStage stage)
+ {
+ // Emulate non-normalized coordinates by normalizing the coordinates on the shader.
+ // Without normalization, the coordinates are expected to the in the [0, W or H] range,
+ // and otherwise, it is expected to be in the [0, 1] range.
+ // We normalize by dividing the coords by the texture size.
+
+ TextureOperation texOp = (TextureOperation)node.Value;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ if (isBindless)
+ {
+ return node;
+ }
+
+ bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
+
+ (int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding);
+
+ bool isCoordNormalized = gpuAccessor.QueryTextureCoordNormalized(handle, cbufSlot);
+
+ if (isCoordNormalized || intCoords)
+ {
+ return node;
+ }
+
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ int coordsCount = texOp.Type.GetDimensions();
+ int coordsIndex = isBindless || isIndexed ? 1 : 0;
+
+ int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
+
+ for (int index = 0; index < normCoordsCount; index++)
+ {
+ Operand coordSize = Local();
+
+ Operand[] texSizeSources;
+
+ if (isBindless || isIndexed)
+ {
+ texSizeSources = new Operand[] { texOp.GetSource(0), Const(0) };
+ }
+ else
+ {
+ texSizeSources = new Operand[] { Const(0) };
+ }
+
+ LinkedListNode<INode> textureSizeNode = node.List.AddBefore(node, new TextureOperation(
+ Instruction.TextureSize,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.Binding,
+ index,
+ new[] { coordSize },
+ texSizeSources));
+
+ resourceManager.SetUsageFlagsForTextureQuery(texOp.Binding, texOp.Type);
+
+ Operand source = texOp.GetSource(coordsIndex + index);
+
+ Operand coordNormalized = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, GenerateI2f(node, coordSize)));
+
+ texOp.SetSource(coordsIndex + index, coordNormalized);
+
+ InsertTextureSizeUnscale(hfm, textureSizeNode, resourceManager, stage);
+ }
+
+ return node;
+ }
+
+ private static LinkedListNode<INode> InsertCoordGatherBias(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
+ {
+ // The gather behavior when the coordinate sits right in the middle of two texels is not well defined.
+ // To ensure the correct texel is sampled, we add a small bias value to the coordinate.
+ // This value is calculated as the minimum value required to change the texel it will sample from,
+ // and is 0 if the host does not require the bias.
+
+ TextureOperation texOp = (TextureOperation)node.Value;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+ bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
+
+ int gatherBiasPrecision = gpuAccessor.QueryHostGatherBiasPrecision();
+
+ if (!isGather || gatherBiasPrecision == 0)
+ {
+ return node;
+ }
+
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ int coordsCount = texOp.Type.GetDimensions();
+ int coordsIndex = isBindless || isIndexed ? 1 : 0;
+
+ int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
+
+ for (int index = 0; index < normCoordsCount; index++)
+ {
+ Operand coordSize = Local();
+ Operand scaledSize = Local();
+ Operand bias = Local();
+
+ Operand[] texSizeSources;
+
+ if (isBindless || isIndexed)
+ {
+ texSizeSources = new Operand[] { texOp.GetSource(0), Const(0) };
+ }
+ else
+ {
+ texSizeSources = new Operand[] { Const(0) };
+ }
+
+ node.List.AddBefore(node, new TextureOperation(
+ Instruction.TextureSize,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.Binding,
+ index,
+ new[] { coordSize },
+ texSizeSources));
+
+ node.List.AddBefore(node, new Operation(
+ Instruction.FP32 | Instruction.Multiply,
+ scaledSize,
+ GenerateI2f(node, coordSize),
+ ConstF((float)(1 << (gatherBiasPrecision + 1)))));
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, bias, ConstF(1f), scaledSize));
+
+ Operand source = texOp.GetSource(coordsIndex + index);
+
+ Operand coordBiased = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordBiased, source, bias));
+
+ texOp.SetSource(coordsIndex + index, coordBiased);
+ }
+
+ return node;
+ }
+
+ private static LinkedListNode<INode> InsertConstOffsets(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
+ {
+ // Non-constant texture offsets are not allowed (according to the spec),
+ // however some GPUs does support that.
+ // For GPUs where it is not supported, we can replace the instruction with the following:
+ // For texture*Offset, we replace it by texture*, and add the offset to the P coords.
+ // The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords).
+ // For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly.
+ // For textureGatherOffset, we split the operation into up to 4 operations, one for each component
+ // that is accessed, where each textureGather operation has a different offset for each pixel.
+
+ TextureOperation texOp = (TextureOperation)node.Value;
+
+ bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
+ bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
+
+ bool hasInvalidOffset = (hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset();
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ if (!hasInvalidOffset)
+ {
+ return node;
+ }
+
+ bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
+ bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
+ bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
+ bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
+ bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
+
+ bool isArray = (texOp.Type & SamplerType.Array) != 0;
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+ bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
+ bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
+
+ int coordsCount = texOp.Type.GetDimensions();
+
+ int offsetsCount;
+
+ if (hasOffsets)
+ {
+ offsetsCount = coordsCount * 4;
+ }
+ else if (hasOffset)
+ {
+ offsetsCount = coordsCount;
+ }
+ else
+ {
+ offsetsCount = 0;
+ }
+
+ Operand[] offsets = new Operand[offsetsCount];
+ Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];
+
+ int copyCount = 0;
+
+ if (isBindless || isIndexed)
+ {
+ copyCount++;
+ }
+
+ Operand[] lodSources = new Operand[copyCount + coordsCount];
+
+ for (int index = 0; index < lodSources.Length; index++)
+ {
+ lodSources[index] = texOp.GetSource(index);
+ }
+
+ copyCount += coordsCount;
+
+ if (isArray)
+ {
+ copyCount++;
+ }
+
+ if (isShadow)
+ {
+ copyCount++;
+ }
+
+ if (hasDerivatives)
+ {
+ copyCount += coordsCount * 2;
+ }
+
+ if (isMultisample)
+ {
+ copyCount++;
+ }
+ else if (hasLodLevel)
+ {
+ copyCount++;
+ }
+
+ int srcIndex = 0;
+ int dstIndex = 0;
+
+ for (int index = 0; index < copyCount; index++)
+ {
+ sources[dstIndex++] = texOp.GetSource(srcIndex++);
+ }
+
+ bool areAllOffsetsConstant = true;
+
+ for (int index = 0; index < offsetsCount; index++)
+ {
+ Operand offset = texOp.GetSource(srcIndex++);
+
+ areAllOffsetsConstant &= offset.Type == OperandType.Constant;
+
+ offsets[index] = offset;
+ }
+
+ hasInvalidOffset &= !areAllOffsetsConstant;
+
+ if (!hasInvalidOffset)
+ {
+ return node;
+ }
+
+ if (hasLodBias)
+ {
+ sources[dstIndex++] = texOp.GetSource(srcIndex++);
+ }
+
+ if (isGather && !isShadow)
+ {
+ sources[dstIndex++] = texOp.GetSource(srcIndex++);
+ }
+
+ int coordsIndex = isBindless || isIndexed ? 1 : 0;
+
+ int componentIndex = texOp.Index;
+
+ Operand[] dests = new Operand[texOp.DestsCount];
+
+ for (int i = 0; i < texOp.DestsCount; i++)
+ {
+ dests[i] = texOp.GetDest(i);
+ }
+
+ Operand bindlessHandle = isBindless || isIndexed ? sources[0] : null;
+
+ LinkedListNode<INode> oldNode = node;
+
+ if (isGather && !isShadow)
+ {
+ Operand[] newSources = new Operand[sources.Length];
+
+ sources.CopyTo(newSources, 0);
+
+ Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount);
+
+ int destIndex = 0;
+
+ for (int compIndex = 0; compIndex < 4; compIndex++)
+ {
+ if (((texOp.Index >> compIndex) & 1) == 0)
+ {
+ continue;
+ }
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ Operand offset = Local();
+
+ Operand intOffset = offsets[index + (hasOffsets ? compIndex * coordsCount : 0)];
+
+ node.List.AddBefore(node, new Operation(
+ Instruction.FP32 | Instruction.Divide,
+ offset,
+ GenerateI2f(node, intOffset),
+ GenerateI2f(node, texSizes[index])));
+
+ Operand source = sources[coordsIndex + index];
+
+ Operand coordPlusOffset = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
+
+ newSources[coordsIndex + index] = coordPlusOffset;
+ }
+
+ TextureOperation newTexOp = new(
+ Instruction.TextureSample,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
+ texOp.Binding,
+ 1,
+ new[] { dests[destIndex++] },
+ newSources);
+
+ node = node.List.AddBefore(node, newTexOp);
+ }
+ }
+ else
+ {
+ if (intCoords)
+ {
+ for (int index = 0; index < coordsCount; index++)
+ {
+ Operand source = sources[coordsIndex + index];
+
+ Operand coordPlusOffset = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index]));
+
+ sources[coordsIndex + index] = coordPlusOffset;
+ }
+ }
+ else
+ {
+ Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount);
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ Operand offset = Local();
+
+ Operand intOffset = offsets[index];
+
+ node.List.AddBefore(node, new Operation(
+ Instruction.FP32 | Instruction.Divide,
+ offset,
+ GenerateI2f(node, intOffset),
+ GenerateI2f(node, texSizes[index])));
+
+ Operand source = sources[coordsIndex + index];
+
+ Operand coordPlusOffset = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
+
+ sources[coordsIndex + index] = coordPlusOffset;
+ }
+ }
+
+ TextureOperation newTexOp = new(
+ Instruction.TextureSample,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
+ texOp.Binding,
+ componentIndex,
+ dests,
+ sources);
+
+ node = node.List.AddBefore(node, newTexOp);
+ }
+
+ node.List.Remove(oldNode);
+
+ for (int index = 0; index < texOp.SourcesCount; index++)
+ {
+ texOp.SetSource(index, null);
+ }
+
+ return node;
+ }
+
+ private static Operand[] InsertTextureLod(
+ LinkedListNode<INode> node,
+ TextureOperation texOp,
+ Operand[] lodSources,
+ Operand bindlessHandle,
+ int coordsCount)
+ {
+ Operand[] texSizes = new Operand[coordsCount];
+
+ Operand lod = Local();
+
+ node.List.AddBefore(node, new TextureOperation(
+ Instruction.Lod,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.Binding,
+ 0,
+ new[] { lod },
+ lodSources));
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ texSizes[index] = Local();
+
+ Operand[] texSizeSources;
+
+ if (bindlessHandle != null)
+ {
+ texSizeSources = new Operand[] { bindlessHandle, GenerateF2i(node, lod) };
+ }
+ else
+ {
+ texSizeSources = new Operand[] { GenerateF2i(node, lod) };
+ }
+
+ node.List.AddBefore(node, new TextureOperation(
+ Instruction.TextureSize,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.Binding,
+ index,
+ new[] { texSizes[index] },
+ texSizeSources));
+ }
+
+ return texSizes;
+ }
+
+ private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
+ {
+ TextureOperation texOp = (TextureOperation)node.Value;
+
+ // We can't query the format of a bindless texture,
+ // because the handle is unknown, it can have any format.
+ if (texOp.Flags.HasFlag(TextureFlags.Bindless))
+ {
+ return node;
+ }
+
+ (int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding);
+
+ TextureFormat format = gpuAccessor.QueryTextureFormat(handle, cbufSlot);
+
+ int maxPositive = format switch
+ {
+ TextureFormat.R8Snorm => sbyte.MaxValue,
+ TextureFormat.R8G8Snorm => sbyte.MaxValue,
+ TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
+ TextureFormat.R16Snorm => short.MaxValue,
+ TextureFormat.R16G16Snorm => short.MaxValue,
+ TextureFormat.R16G16B16A16Snorm => short.MaxValue,
+ _ => 0,
+ };
+
+ // The value being 0 means that the format is not a SNORM format,
+ // so there's nothing to do here.
+ if (maxPositive == 0)
+ {
+ return node;
+ }
+
+ // Do normalization. We assume SINT formats are being used
+ // as replacement for SNORM (which is not supported).
+ for (int i = 0; i < texOp.DestsCount; i++)
+ {
+ Operand dest = texOp.GetDest(i);
+
+ INode[] uses = dest.UseOps.ToArray();
+
+ Operation convOp = new(Instruction.ConvertS32ToFP32, Local(), dest);
+ Operation normOp = new(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
+
+ node = node.List.AddAfter(node, convOp);
+ node = node.List.AddAfter(node, normOp);
+
+ foreach (INode useOp in uses)
+ {
+ if (useOp is not Operation op)
+ {
+ continue;
+ }
+
+ // Replace all uses of the texture pixel value with the normalized value.
+ for (int index = 0; index < op.SourcesCount; index++)
+ {
+ if (op.GetSource(index) == dest)
+ {
+ op.SetSource(index, normOp.Dest);
+ }
+ }
+ }
+ }
+
+ return node;
+ }
+
+ private static Operand GenerateI2f(LinkedListNode<INode> node, Operand value)
+ {
+ Operand res = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value));
+
+ return res;
+ }
+
+ private static Operand GenerateF2i(LinkedListNode<INode> node, Operand value)
+ {
+ Operand res = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value));
+
+ return res;
+ }
+
+ private static bool IsImageInstructionWithScale(Instruction inst)
+ {
+ // Currently, we don't support scaling images that are modified,
+ // so we only need to care about the load instruction.
+ return inst == Instruction.ImageLoad;
+ }
+
+ private static bool TypeSupportsScale(SamplerType type)
+ {
+ return (type & SamplerType.Mask) == SamplerType.Texture2D;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs
new file mode 100644
index 00000000..c3bbe7dd
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs
@@ -0,0 +1,41 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.Translation.Transforms
+{
+ static class TransformPasses
+ {
+ public static void RunPass(TransformContext context)
+ {
+ RunPass<DrawParametersReplace>(context);
+ RunPass<ForcePreciseEnable>(context);
+ RunPass<VectorComponentSelect>(context);
+ RunPass<TexturePass>(context);
+ RunPass<SharedStoreSmallIntCas>(context);
+ RunPass<SharedAtomicSignedCas>(context);
+ }
+
+ private static void RunPass<T>(TransformContext context) where T : ITransformPass
+ {
+ if (!T.IsEnabled(context.GpuAccessor, context.Stage, context.TargetLanguage, context.UsedFeatures))
+ {
+ return;
+ }
+
+ for (int blkIndex = 0; blkIndex < context.Blocks.Length; blkIndex++)
+ {
+ BasicBlock block = context.Blocks[blkIndex];
+
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ if (node.Value is not Operation)
+ {
+ continue;
+ }
+
+ node = T.RunPass(context, node);
+ }
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/VectorComponentSelect.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/VectorComponentSelect.cs
new file mode 100644
index 00000000..e55f4355
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/VectorComponentSelect.cs
@@ -0,0 +1,96 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Transforms
+{
+ class VectorComponentSelect : ITransformPass
+ {
+ public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
+ {
+ return gpuAccessor.QueryHostHasVectorIndexingBug();
+ }
+
+ public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
+ {
+ Operation operation = (Operation)node.Value;
+
+ if (operation.Inst != Instruction.Load ||
+ operation.StorageKind != StorageKind.ConstantBuffer ||
+ operation.SourcesCount < 3)
+ {
+ return node;
+ }
+
+ Operand bindingIndex = operation.GetSource(0);
+ Operand fieldIndex = operation.GetSource(1);
+ Operand elemIndex = operation.GetSource(operation.SourcesCount - 1);
+
+ if (bindingIndex.Type != OperandType.Constant ||
+ fieldIndex.Type != OperandType.Constant ||
+ elemIndex.Type == OperandType.Constant)
+ {
+ return node;
+ }
+
+ BufferDefinition buffer = context.ResourceManager.Properties.ConstantBuffers[bindingIndex.Value];
+ StructureField field = buffer.Type.Fields[fieldIndex.Value];
+
+ int elemCount = (field.Type & AggregateType.ElementCountMask) switch
+ {
+ AggregateType.Vector2 => 2,
+ AggregateType.Vector3 => 3,
+ AggregateType.Vector4 => 4,
+ _ => 1
+ };
+
+ if (elemCount == 1)
+ {
+ return node;
+ }
+
+ Operand result = null;
+
+ for (int i = 0; i < elemCount; i++)
+ {
+ Operand value = Local();
+ Operand[] inputs = new Operand[operation.SourcesCount];
+
+ for (int srcIndex = 0; srcIndex < inputs.Length - 1; srcIndex++)
+ {
+ inputs[srcIndex] = operation.GetSource(srcIndex);
+ }
+
+ inputs[^1] = Const(i);
+
+ Operation loadOp = new(Instruction.Load, StorageKind.ConstantBuffer, value, inputs);
+
+ node.List.AddBefore(node, loadOp);
+
+ if (i == 0)
+ {
+ result = value;
+ }
+ else
+ {
+ Operand isCurrentIndex = Local();
+ Operand selection = Local();
+
+ Operation compareOp = new(Instruction.CompareEqual, isCurrentIndex, new Operand[] { elemIndex, Const(i) });
+ Operation selectOp = new(Instruction.ConditionalSelect, selection, new Operand[] { isCurrentIndex, value, result });
+
+ node.List.AddBefore(node, compareOp);
+ node.List.AddBefore(node, selectOp);
+
+ result = selection;
+ }
+ }
+
+ operation.TurnIntoCopy(result);
+
+ return node;
+ }
+ }
+}