diff options
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/Translation')
25 files changed, 1541 insertions, 1280 deletions
diff --git a/src/Ryujinx.Graphics.Shader/Translation/AttributeUsage.cs b/src/Ryujinx.Graphics.Shader/Translation/AttributeUsage.cs new file mode 100644 index 00000000..9dab9fdf --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/AttributeUsage.cs @@ -0,0 +1,168 @@ +using System; +using System.Collections.Generic; +using System.Numerics; + +namespace Ryujinx.Graphics.Shader.Translation +{ + class AttributeUsage + { + public bool NextUsesFixedFuncAttributes { get; private set; } + public int UsedInputAttributes { get; private set; } + public int UsedOutputAttributes { get; private set; } + public HashSet<int> UsedInputAttributesPerPatch { get; } + public HashSet<int> UsedOutputAttributesPerPatch { get; } + public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; } + public int PassthroughAttributes { get; private set; } + private int _nextUsedInputAttributes; + private int _thisUsedInputAttributes; + private Dictionary<int, int> _perPatchAttributeLocations; + private readonly IGpuAccessor _gpuAccessor; + + public UInt128 NextInputAttributesComponents { get; private set; } + public UInt128 ThisInputAttributesComponents { get; private set; } + + public AttributeUsage(IGpuAccessor gpuAccessor) + { + _gpuAccessor = gpuAccessor; + + UsedInputAttributesPerPatch = new(); + UsedOutputAttributesPerPatch = new(); + } + + public void SetInputUserAttribute(int index, int component) + { + int mask = 1 << index; + + UsedInputAttributes |= mask; + _thisUsedInputAttributes |= mask; + ThisInputAttributesComponents |= UInt128.One << (index * 4 + component); + } + + public void SetInputUserAttributePerPatch(int index) + { + UsedInputAttributesPerPatch.Add(index); + } + + public void SetOutputUserAttribute(int index) + { + UsedOutputAttributes |= 1 << index; + } + + public void SetOutputUserAttributePerPatch(int index) + { + UsedOutputAttributesPerPatch.Add(index); + } + + public void MergeFromtNextStage(bool gpPassthrough, bool nextUsesFixedFunctionAttributes, AttributeUsage nextStage) + { + NextInputAttributesComponents = nextStage.ThisInputAttributesComponents; + NextUsedInputAttributesPerPatch = nextStage.UsedInputAttributesPerPatch; + NextUsesFixedFuncAttributes = nextUsesFixedFunctionAttributes; + MergeOutputUserAttributes(gpPassthrough, nextStage.UsedInputAttributes, nextStage.UsedInputAttributesPerPatch); + + if (UsedOutputAttributesPerPatch.Count != 0) + { + // Regular and per-patch input/output locations can't overlap, + // so we must assign on our location using unused regular input/output locations. + + Dictionary<int, int> locationsMap = new(); + + int freeMask = ~UsedOutputAttributes; + + foreach (int attr in UsedOutputAttributesPerPatch) + { + int location = BitOperations.TrailingZeroCount(freeMask); + if (location == 32) + { + _gpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}."); + break; + } + + locationsMap.Add(attr, location); + freeMask &= ~(1 << location); + } + + // Both stages must agree on the locations, so use the same "map" for both. + _perPatchAttributeLocations = locationsMap; + nextStage._perPatchAttributeLocations = locationsMap; + } + } + + private void MergeOutputUserAttributes(bool gpPassthrough, int mask, IEnumerable<int> perPatch) + { + _nextUsedInputAttributes = mask; + + if (gpPassthrough) + { + PassthroughAttributes = mask & ~UsedOutputAttributes; + } + else + { + UsedOutputAttributes |= mask; + UsedOutputAttributesPerPatch.UnionWith(perPatch); + } + } + + public int GetPerPatchAttributeLocation(int index) + { + if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location)) + { + return index; + } + + return location; + } + + public bool IsUsedOutputAttribute(int attr) + { + // The check for fixed function attributes on the next stage is conservative, + // returning false if the output is just not used by the next stage is also valid. + if (NextUsesFixedFuncAttributes && + attr >= AttributeConsts.UserAttributeBase && + attr < AttributeConsts.UserAttributeEnd) + { + int index = (attr - AttributeConsts.UserAttributeBase) >> 4; + return (_nextUsedInputAttributes & (1 << index)) != 0; + } + + return true; + } + + public int GetFreeUserAttribute(bool isOutput, int index) + { + int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes; + int bit = -1; + + while (useMask != -1) + { + bit = BitOperations.TrailingZeroCount(~useMask); + + if (bit == 32) + { + bit = -1; + break; + } + else if (index < 1) + { + break; + } + + useMask |= 1 << bit; + index--; + } + + return bit; + } + + public void SetAllInputUserAttributes() + { + UsedInputAttributes |= Constants.AllAttributesMask; + ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4); + } + + public void SetAllOutputUserAttributes() + { + UsedOutputAttributes |= Constants.AllAttributesMask; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs index 614b275b..43263dd4 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs @@ -11,7 +11,8 @@ namespace Ryujinx.Graphics.Shader.Translation class EmitterContext { public DecodedProgram Program { get; } - public ShaderConfig Config { get; } + public TranslatorContext TranslatorContext { get; } + public ResourceManager ResourceManager { get; } public bool IsNonMain { get; } @@ -54,10 +55,15 @@ namespace Ryujinx.Graphics.Shader.Translation _labels = new Dictionary<ulong, BlockLabel>(); } - public EmitterContext(DecodedProgram program, ShaderConfig config, bool isNonMain) : this() + public EmitterContext( + TranslatorContext translatorContext, + ResourceManager resourceManager, + DecodedProgram program, + bool isNonMain) : this() { + TranslatorContext = translatorContext; + ResourceManager = resourceManager; Program = program; - Config = config; IsNonMain = isNonMain; EmitStart(); @@ -65,12 +71,12 @@ namespace Ryujinx.Graphics.Shader.Translation private void EmitStart() { - if (Config.Stage == ShaderStage.Vertex && - Config.Options.TargetApi == TargetApi.Vulkan && - (Config.Options.Flags & TranslationFlags.VertexA) == 0) + if (TranslatorContext.Definitions.Stage == ShaderStage.Vertex && + TranslatorContext.Options.TargetApi == TargetApi.Vulkan && + (TranslatorContext.Options.Flags & TranslationFlags.VertexA) == 0) { // Vulkan requires the point size to be always written on the shader if the primitive topology is points. - this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(Config.GpuAccessor.QueryPointSize())); + this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(TranslatorContext.Definitions.PointSize)); } } @@ -115,49 +121,6 @@ namespace Ryujinx.Graphics.Shader.Translation _operations.Add(operation); } - public void FlagAttributeRead(int attribute) - { - if (Config.Stage == ShaderStage.Vertex && attribute == AttributeConsts.InstanceId) - { - Config.SetUsedFeature(FeatureFlags.InstanceId); - } - else if (Config.Stage == ShaderStage.Fragment) - { - switch (attribute) - { - case AttributeConsts.PositionX: - case AttributeConsts.PositionY: - Config.SetUsedFeature(FeatureFlags.FragCoordXY); - break; - } - } - } - - public void FlagAttributeWritten(int attribute) - { - if (Config.Stage == ShaderStage.Vertex) - { - switch (attribute) - { - case AttributeConsts.ClipDistance0: - case AttributeConsts.ClipDistance1: - case AttributeConsts.ClipDistance2: - case AttributeConsts.ClipDistance3: - case AttributeConsts.ClipDistance4: - case AttributeConsts.ClipDistance5: - case AttributeConsts.ClipDistance6: - case AttributeConsts.ClipDistance7: - Config.SetClipDistanceWritten((attribute - AttributeConsts.ClipDistance0) / 4); - break; - } - } - - if (Config.Stage != ShaderStage.Fragment && attribute == AttributeConsts.Layer) - { - Config.SetUsedFeature(FeatureFlags.RtLayer); - } - } - public void MarkLabel(Operand label) { Add(Instruction.MarkLabel, label); @@ -203,14 +166,14 @@ namespace Ryujinx.Graphics.Shader.Translation public void PrepareForVertexReturn() { - if (!Config.GpuAccessor.QueryHostSupportsTransformFeedback() && Config.GpuAccessor.QueryTransformFeedbackEnabled()) + if (!TranslatorContext.GpuAccessor.QueryHostSupportsTransformFeedback() && TranslatorContext.GpuAccessor.QueryTransformFeedbackEnabled()) { Operand vertexCount = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(1)); for (int tfbIndex = 0; tfbIndex < Constants.TfeBuffersCount; tfbIndex++) { - var locations = Config.GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex); - var stride = Config.GpuAccessor.QueryTransformFeedbackStride(tfbIndex); + var locations = TranslatorContext.GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex); + var stride = TranslatorContext.GpuAccessor.QueryTransformFeedbackStride(tfbIndex); Operand baseOffset = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(0), Const(tfbIndex)); Operand baseVertex = this.Load(StorageKind.Input, IoVariable.BaseVertex); @@ -242,7 +205,7 @@ namespace Ryujinx.Graphics.Shader.Translation } } - if (Config.GpuAccessor.QueryViewportTransformDisable()) + if (TranslatorContext.Definitions.ViewportTransformDisable) { Operand x = this.Load(StorageKind.Output, IoVariable.Position, null, Const(0)); Operand y = this.Load(StorageKind.Output, IoVariable.Position, null, Const(1)); @@ -254,7 +217,7 @@ namespace Ryujinx.Graphics.Shader.Translation this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), this.FPFusedMultiplyAdd(y, yScale, negativeOne)); } - if (Config.GpuAccessor.QueryTransformDepthMinusOneToOne() && !Config.GpuAccessor.QueryHostSupportsDepthClipControl()) + if (TranslatorContext.Definitions.DepthMode && !TranslatorContext.GpuAccessor.QueryHostSupportsDepthClipControl()) { Operand z = this.Load(StorageKind.Output, IoVariable.Position, null, Const(2)); Operand w = this.Load(StorageKind.Output, IoVariable.Position, null, Const(3)); @@ -263,12 +226,10 @@ namespace Ryujinx.Graphics.Shader.Translation this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), this.FPFusedMultiplyAdd(z, ConstF(0.5f), halfW)); } - if (Config.Stage != ShaderStage.Geometry && Config.HasLayerInputAttribute) + if (TranslatorContext.Definitions.Stage != ShaderStage.Geometry && TranslatorContext.HasLayerInputAttribute) { - Config.SetUsedFeature(FeatureFlags.RtLayer); - - int attrVecIndex = Config.GpLayerInputAttribute >> 2; - int attrComponentIndex = Config.GpLayerInputAttribute & 3; + int attrVecIndex = TranslatorContext.GpLayerInputAttribute >> 2; + int attrComponentIndex = TranslatorContext.GpLayerInputAttribute & 3; Operand layer = this.Load(StorageKind.Output, IoVariable.UserDefined, null, Const(attrVecIndex), Const(attrComponentIndex)); @@ -278,7 +239,7 @@ namespace Ryujinx.Graphics.Shader.Translation public void PrepareForVertexReturn(out Operand oldXLocal, out Operand oldYLocal, out Operand oldZLocal) { - if (Config.GpuAccessor.QueryViewportTransformDisable()) + if (TranslatorContext.Definitions.ViewportTransformDisable) { oldXLocal = Local(); this.Copy(oldXLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(0))); @@ -291,7 +252,7 @@ namespace Ryujinx.Graphics.Shader.Translation oldYLocal = null; } - if (Config.GpuAccessor.QueryTransformDepthMinusOneToOne() && !Config.GpuAccessor.QueryHostSupportsDepthClipControl()) + if (TranslatorContext.Definitions.DepthMode && !TranslatorContext.GpuAccessor.QueryHostSupportsDepthClipControl()) { oldZLocal = Local(); this.Copy(oldZLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(2))); @@ -311,13 +272,13 @@ namespace Ryujinx.Graphics.Shader.Translation return true; } - if (Config.LastInVertexPipeline && - (Config.Stage == ShaderStage.Vertex || Config.Stage == ShaderStage.TessellationEvaluation) && - (Config.Options.Flags & TranslationFlags.VertexA) == 0) + if (TranslatorContext.Definitions.LastInVertexPipeline && + (TranslatorContext.Definitions.Stage == ShaderStage.Vertex || TranslatorContext.Definitions.Stage == ShaderStage.TessellationEvaluation) && + (TranslatorContext.Options.Flags & TranslationFlags.VertexA) == 0) { PrepareForVertexReturn(); } - else if (Config.Stage == ShaderStage.Geometry) + else if (TranslatorContext.Definitions.Stage == ShaderStage.Geometry) { void WritePositionOutput(int primIndex) { @@ -345,20 +306,19 @@ namespace Ryujinx.Graphics.Shader.Translation this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(3), w); } - if (Config.GpPassthrough && !Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + if (TranslatorContext.Definitions.GpPassthrough && !TranslatorContext.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) { - int inputVertices = Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices(); + int inputVertices = TranslatorContext.Definitions.InputTopology.ToInputVertices(); for (int primIndex = 0; primIndex < inputVertices; primIndex++) { WritePositionOutput(primIndex); - int passthroughAttributes = Config.PassthroughAttributes; + int passthroughAttributes = TranslatorContext.AttributeUsage.PassthroughAttributes; while (passthroughAttributes != 0) { int index = BitOperations.TrailingZeroCount(passthroughAttributes); WriteUserDefinedOutput(index, primIndex); - Config.SetOutputUserAttribute(index); passthroughAttributes &= ~(1 << index); } @@ -368,20 +328,20 @@ namespace Ryujinx.Graphics.Shader.Translation this.EndPrimitive(); } } - else if (Config.Stage == ShaderStage.Fragment) + else if (TranslatorContext.Definitions.Stage == ShaderStage.Fragment) { GenerateAlphaToCoverageDitherDiscard(); - bool supportsBgra = Config.GpuAccessor.QueryHostSupportsBgraFormat(); + bool supportsBgra = TranslatorContext.GpuAccessor.QueryHostSupportsBgraFormat(); - if (Config.OmapDepth) + if (TranslatorContext.Definitions.OmapDepth) { - Operand src = Register(Config.GetDepthRegister(), RegisterType.Gpr); + Operand src = Register(TranslatorContext.GetDepthRegister(), RegisterType.Gpr); this.Store(StorageKind.Output, IoVariable.FragmentOutputDepth, null, src); } - AlphaTestOp alphaTestOp = Config.GpuAccessor.QueryAlphaTestCompare(); + AlphaTestOp alphaTestOp = TranslatorContext.Definitions.AlphaTestCompare; if (alphaTestOp != AlphaTestOp.Always) { @@ -389,7 +349,7 @@ namespace Ryujinx.Graphics.Shader.Translation { this.Discard(); } - else if ((Config.OmapTargets & 8) != 0) + else if ((TranslatorContext.Definitions.OmapTargets & 8) != 0) { Instruction comparator = alphaTestOp switch { @@ -405,7 +365,7 @@ namespace Ryujinx.Graphics.Shader.Translation Debug.Assert(comparator != 0, $"Invalid alpha test operation \"{alphaTestOp}\"."); Operand alpha = Register(3, RegisterType.Gpr); - Operand alphaRef = ConstF(Config.GpuAccessor.QueryAlphaTestReference()); + Operand alphaRef = ConstF(TranslatorContext.Definitions.AlphaTestReference); Operand alphaPass = Add(Instruction.FP32 | comparator, Local(), alpha, alphaRef); Operand alphaPassLabel = Label(); @@ -427,7 +387,7 @@ namespace Ryujinx.Graphics.Shader.Translation { for (int component = 0; component < 4; component++) { - bool componentEnabled = (Config.OmapTargets & (1 << (rtIndex * 4 + component))) != 0; + bool componentEnabled = (TranslatorContext.Definitions.OmapTargets & (1 << (rtIndex * 4 + component))) != 0; if (!componentEnabled) { continue; @@ -460,10 +420,9 @@ namespace Ryujinx.Graphics.Shader.Translation } } - bool targetEnabled = (Config.OmapTargets & (0xf << (rtIndex * 4))) != 0; + bool targetEnabled = (TranslatorContext.Definitions.OmapTargets & (0xf << (rtIndex * 4))) != 0; if (targetEnabled) { - Config.SetOutputUserAttribute(rtIndex); regIndexBase += 4; } } @@ -475,7 +434,7 @@ namespace Ryujinx.Graphics.Shader.Translation private void GenerateAlphaToCoverageDitherDiscard() { // If the feature is disabled, or alpha is not written, then we're done. - if (!Config.GpuAccessor.QueryAlphaToCoverageDitherEnable() || (Config.OmapTargets & 8) == 0) + if (!TranslatorContext.Definitions.AlphaToCoverageDitherEnable || (TranslatorContext.Definitions.OmapTargets & 8) == 0) { return; } diff --git a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs index 9d4d032a..5b7226ac 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs @@ -12,15 +12,12 @@ namespace Ryujinx.Graphics.Shader.Translation None = 0, // Affected by resolution scaling. - IntegerSampling = 1 << 0, FragCoordXY = 1 << 1, Bindless = 1 << 2, InstanceId = 1 << 3, DrawParameters = 1 << 4, RtLayer = 1 << 5, - IaIndexing = 1 << 7, - OaIndexing = 1 << 8, FixedFuncAttr = 1 << 9, LocalMemory = 1 << 10, SharedMemory = 1 << 11, diff --git a/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs b/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs new file mode 100644 index 00000000..2523272b --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs @@ -0,0 +1,34 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + class HostCapabilities + { + public readonly bool ReducedPrecision; + public readonly bool SupportsFragmentShaderInterlock; + public readonly bool SupportsFragmentShaderOrderingIntel; + public readonly bool SupportsGeometryShaderPassthrough; + public readonly bool SupportsShaderBallot; + public readonly bool SupportsShaderBarrierDivergence; + public readonly bool SupportsTextureShadowLod; + public readonly bool SupportsViewportMask; + + public HostCapabilities( + bool reducedPrecision, + bool supportsFragmentShaderInterlock, + bool supportsFragmentShaderOrderingIntel, + bool supportsGeometryShaderPassthrough, + bool supportsShaderBallot, + bool supportsShaderBarrierDivergence, + bool supportsTextureShadowLod, + bool supportsViewportMask) + { + ReducedPrecision = reducedPrecision; + SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock; + SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel; + SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough; + SupportsShaderBallot = supportsShaderBallot; + SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence; + SupportsTextureShadowLod = supportsTextureShadowLod; + SupportsViewportMask = supportsViewportMask; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs index bf087aff..43d98d3c 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs @@ -1,12 +1,13 @@ using Ryujinx.Graphics.Shader.Instructions; using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; using System.Collections.Generic; namespace Ryujinx.Graphics.Shader.Translation.Optimizations { class BindlessElimination { - public static void RunPass(BasicBlock block, ShaderConfig config) + public static void RunPass(BasicBlock block, ResourceManager resourceManager, IGpuAccessor gpuAccessor) { // We can turn a bindless into regular access by recognizing the pattern // produced by the compiler for separate texture and sampler. @@ -43,7 +44,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if (bindlessHandle.Type == OperandType.ConstantBuffer) { - SetHandle(config, texOp, bindlessHandle.GetCbufOffset(), bindlessHandle.GetCbufSlot(), rewriteSamplerType, isImage: false); + SetHandle( + resourceManager, + gpuAccessor, + texOp, + bindlessHandle.GetCbufOffset(), + bindlessHandle.GetCbufSlot(), + rewriteSamplerType, + isImage: false); + continue; } @@ -140,7 +149,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if (handleType == TextureHandleType.SeparateConstantSamplerHandle) { SetHandle( - config, + resourceManager, + gpuAccessor, texOp, TextureHandle.PackOffsets(src0.GetCbufOffset(), ((src1.Value >> 20) & 0xfff), handleType), TextureHandle.PackSlots(src0.GetCbufSlot(), 0), @@ -150,7 +160,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations else if (src1.Type == OperandType.ConstantBuffer) { SetHandle( - config, + resourceManager, + gpuAccessor, texOp, TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType), TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()), @@ -173,17 +184,17 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { if (texOp.Inst == Instruction.ImageAtomic) { - texOp.Format = config.GetTextureFormatAtomic(cbufOffset, cbufSlot); + texOp.Format = ShaderProperties.GetTextureFormatAtomic(gpuAccessor, cbufOffset, cbufSlot); } else { - texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot); + texOp.Format = ShaderProperties.GetTextureFormat(gpuAccessor, cbufOffset, cbufSlot); } } bool rewriteSamplerType = texOp.Type == SamplerType.TextureBuffer; - SetHandle(config, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true); + SetHandle(resourceManager, gpuAccessor, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true); } } } @@ -220,11 +231,18 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations return null; } - private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType, bool isImage) + private static void SetHandle( + ResourceManager resourceManager, + IGpuAccessor gpuAccessor, + TextureOperation texOp, + int cbufOffset, + int cbufSlot, + bool rewriteSamplerType, + bool isImage) { if (rewriteSamplerType) { - SamplerType newType = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot); + SamplerType newType = gpuAccessor.QuerySamplerType(cbufOffset, cbufSlot); if (texOp.Inst.IsTextureQuery()) { @@ -253,7 +271,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } } - int binding = config.ResourceManager.GetTextureOrImageBinding( + int binding = resourceManager.GetTextureOrImageBinding( texOp.Inst, texOp.Type, texOp.Format, diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs index 4b1bf76e..2bd31fe1 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs @@ -9,7 +9,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { private const int NvnTextureBufferIndex = 2; - public static void RunPass(BasicBlock block, ShaderConfig config) + public static void RunPass(BasicBlock block, ResourceManager resourceManager) { // We can turn a bindless texture access into a indexed access, // as long the following conditions are true: @@ -44,7 +44,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations Operand ldcSrc0 = handleAsgOp.GetSource(0); if (ldcSrc0.Type != OperandType.Constant || - !config.ResourceManager.TryGetConstantBufferSlot(ldcSrc0.Value, out int src0CbufSlot) || + !resourceManager.TryGetConstantBufferSlot(ldcSrc0.Value, out int src0CbufSlot) || src0CbufSlot != NvnTextureBufferIndex) { continue; @@ -88,7 +88,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations continue; } - TurnIntoIndexed(config, texOp, addSrc1.Value / 4); + TurnIntoIndexed(resourceManager, texOp, addSrc1.Value / 4); Operand index = Local(); @@ -102,9 +102,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } } - private static void TurnIntoIndexed(ShaderConfig config, TextureOperation texOp, int handle) + private static void TurnIntoIndexed(ResourceManager resourceManager, TextureOperation texOp, int handle) { - int binding = config.ResourceManager.GetTextureOrImageBinding( + int binding = resourceManager.GetTextureOrImageBinding( texOp.Inst, texOp.Type | SamplerType.Indexed, texOp.Format, diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs index 0cca0ac6..3941303b 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs @@ -7,7 +7,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { static class ConstantFolding { - public static void RunPass(ShaderConfig config, Operation operation) + public static void RunPass(ResourceManager resourceManager, Operation operation) { if (!AreAllSourcesConstant(operation)) { @@ -158,7 +158,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations int binding = operation.GetSource(0).Value; int fieldIndex = operation.GetSource(1).Value; - if (config.ResourceManager.TryGetConstantBufferSlot(binding, out int cbufSlot) && fieldIndex == 0) + if (resourceManager.TryGetConstantBufferSlot(binding, out int cbufSlot) && fieldIndex == 0) { int vecIndex = operation.GetSource(2).Value; int elemIndex = operation.GetSource(3).Value; diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs index 2433aeb2..0f043f77 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs @@ -205,7 +205,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } } - public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config) + public static void RunPass( + HelperFunctionManager hfm, + BasicBlock[] blocks, + ResourceManager resourceManager, + IGpuAccessor gpuAccessor, + TargetLanguage targetLanguage) { GtsContext gtsContext = new(hfm); @@ -220,14 +225,20 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if (IsGlobalMemory(operation.StorageKind)) { - LinkedListNode<INode> nextNode = ReplaceGlobalMemoryWithStorage(gtsContext, config, block, node); + LinkedListNode<INode> nextNode = ReplaceGlobalMemoryWithStorage( + gtsContext, + resourceManager, + gpuAccessor, + targetLanguage, + block, + node); if (nextNode == null) { // The returned value being null means that the global memory replacement failed, // so we just make loads read 0 and stores do nothing. - config.GpuAccessor.Log($"Failed to reserve storage buffer for global memory operation \"{operation.Inst}\"."); + gpuAccessor.Log($"Failed to reserve storage buffer for global memory operation \"{operation.Inst}\"."); if (operation.Dest != null) { @@ -286,7 +297,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations private static LinkedListNode<INode> ReplaceGlobalMemoryWithStorage( GtsContext gtsContext, - ShaderConfig config, + ResourceManager resourceManager, + IGpuAccessor gpuAccessor, + TargetLanguage targetLanguage, BasicBlock block, LinkedListNode<INode> node) { @@ -303,7 +316,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations Operand offset = result.Offset; - bool storageUnaligned = config.GpuAccessor.QueryHasUnalignedStorageBuffer(); + bool storageUnaligned = gpuAccessor.QueryHasUnalignedStorageBuffer(); if (storageUnaligned) { @@ -312,7 +325,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations Operand baseAddressMasked = Local(); Operand hostOffset = Local(); - int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment(); + int alignment = gpuAccessor.QueryHostStorageBufferOffsetAlignment(); Operation maskOp = new(Instruction.BitwiseAnd, baseAddressMasked, baseAddress, Const(-alignment)); Operation subOp = new(Instruction.Subtract, hostOffset, globalAddress, baseAddressMasked); @@ -333,13 +346,19 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations offset = newOffset; } - if (CanUseInlineStorageOp(operation, config.Options.TargetLanguage)) + if (CanUseInlineStorageOp(operation, targetLanguage)) { - return GenerateInlineStorageOp(config, node, operation, offset, result); + return GenerateInlineStorageOp(resourceManager, node, operation, offset, result); } else { - if (!TryGenerateSingleTargetStorageOp(gtsContext, config, operation, result, out int functionId)) + if (!TryGenerateSingleTargetStorageOp( + gtsContext, + resourceManager, + targetLanguage, + operation, + result, + out int functionId)) { return null; } @@ -354,7 +373,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations // the base address might be stored. // Generate a helper function that will check all possible storage buffers and use the right one. - if (!TryGenerateMultiTargetStorageOp(gtsContext, config, block, operation, out int functionId)) + if (!TryGenerateMultiTargetStorageOp( + gtsContext, + resourceManager, + gpuAccessor, + targetLanguage, + block, + operation, + out int functionId)) { return null; } @@ -375,14 +401,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } private static LinkedListNode<INode> GenerateInlineStorageOp( - ShaderConfig config, + ResourceManager resourceManager, LinkedListNode<INode> node, Operation operation, Operand offset, SearchResult result) { bool isStore = operation.Inst == Instruction.Store || operation.Inst.IsAtomic(); - if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding)) + if (!resourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding)) { return null; } @@ -474,7 +500,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations private static bool TryGenerateSingleTargetStorageOp( GtsContext gtsContext, - ShaderConfig config, + ResourceManager resourceManager, + TargetLanguage targetLanguage, Operation operation, SearchResult result, out int functionId) @@ -514,7 +541,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } if (!TryGenerateStorageOp( - config, + resourceManager, + targetLanguage, context, operation.Inst, operation.StorageKind, @@ -555,7 +583,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations private static bool TryGenerateMultiTargetStorageOp( GtsContext gtsContext, - ShaderConfig config, + ResourceManager resourceManager, + IGpuAccessor gpuAccessor, + TargetLanguage targetLanguage, BasicBlock block, Operation operation, out int functionId) @@ -624,7 +654,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if (targetCbs.Count == 0) { - config.GpuAccessor.Log($"Failed to find storage buffer for global memory operation \"{operation.Inst}\"."); + gpuAccessor.Log($"Failed to find storage buffer for global memory operation \"{operation.Inst}\"."); } if (gtsContext.TryGetFunctionId(operation, isMultiTarget: true, targetCbs, out functionId)) @@ -685,13 +715,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations SearchResult result = new(sbCbSlot, sbCbOffset); - int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment(); + int alignment = gpuAccessor.QueryHostStorageBufferOffsetAlignment(); Operand baseAddressMasked = context.BitwiseAnd(baseAddrLow, Const(-alignment)); Operand hostOffset = context.ISubtract(globalAddressLow, baseAddressMasked); if (!TryGenerateStorageOp( - config, + resourceManager, + targetLanguage, context, operation.Inst, operation.StorageKind, @@ -781,7 +812,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } private static bool TryGenerateStorageOp( - ShaderConfig config, + ResourceManager resourceManager, + TargetLanguage targetLanguage, EmitterContext context, Instruction inst, StorageKind storageKind, @@ -794,7 +826,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations resultValue = null; bool isStore = inst.IsAtomic() || inst == Instruction.Store; - if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding)) + if (!resourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding)) { return false; } @@ -820,7 +852,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations resultValue = context.AtomicCompareAndSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, compare, value); break; case Instruction.AtomicMaxS32: - if (config.Options.TargetLanguage == TargetLanguage.Spirv) + if (targetLanguage == TargetLanguage.Spirv) { resultValue = context.AtomicMaxS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); } @@ -836,7 +868,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations resultValue = context.AtomicMaxU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); break; case Instruction.AtomicMinS32: - if (config.Options.TargetLanguage == TargetLanguage.Spirv) + if (targetLanguage == TargetLanguage.Spirv) { resultValue = context.AtomicMinS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); } diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs index e7805027..17427a5f 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs @@ -7,40 +7,40 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { static class Optimizer { - public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config) + public static void RunPass(TransformContext context) { - RunOptimizationPasses(blocks, config); + RunOptimizationPasses(context.Blocks, context.ResourceManager); // TODO: Some of those are not optimizations and shouldn't be here. - GlobalToStorage.RunPass(hfm, blocks, config); + GlobalToStorage.RunPass(context.Hfm, context.Blocks, context.ResourceManager, context.GpuAccessor, context.TargetLanguage); - bool hostSupportsShaderFloat64 = config.GpuAccessor.QueryHostSupportsShaderFloat64(); + bool hostSupportsShaderFloat64 = context.GpuAccessor.QueryHostSupportsShaderFloat64(); // Those passes are looking for specific patterns and only needs to run once. - for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + for (int blkIndex = 0; blkIndex < context.Blocks.Length; blkIndex++) { - BindlessToIndexed.RunPass(blocks[blkIndex], config); - BindlessElimination.RunPass(blocks[blkIndex], config); + BindlessToIndexed.RunPass(context.Blocks[blkIndex], context.ResourceManager); + BindlessElimination.RunPass(context.Blocks[blkIndex], context.ResourceManager, context.GpuAccessor); // FragmentCoord only exists on fragment shaders, so we don't need to check other stages. - if (config.Stage == ShaderStage.Fragment) + if (context.Stage == ShaderStage.Fragment) { - EliminateMultiplyByFragmentCoordW(blocks[blkIndex]); + EliminateMultiplyByFragmentCoordW(context.Blocks[blkIndex]); } // If the host does not support double operations, we need to turn them into float operations. if (!hostSupportsShaderFloat64) { - DoubleToFloat.RunPass(hfm, blocks[blkIndex]); + DoubleToFloat.RunPass(context.Hfm, context.Blocks[blkIndex]); } } // Run optimizations one last time to remove any code that is now optimizable after above passes. - RunOptimizationPasses(blocks, config); + RunOptimizationPasses(context.Blocks, context.ResourceManager); } - private static void RunOptimizationPasses(BasicBlock[] blocks, ShaderConfig config) + private static void RunOptimizationPasses(BasicBlock[] blocks, ResourceManager resourceManager) { bool modified; @@ -79,7 +79,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations continue; } - ConstantFolding.RunPass(config, operation); + ConstantFolding.RunPass(resourceManager, operation); Simplification.RunPass(operation); if (DestIsLocalVar(operation)) diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs index 555acd35..d07d8dce 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs @@ -50,10 +50,10 @@ namespace Ryujinx.Graphics.Shader.Translation public ShaderProperties Properties { get; } - public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties) + public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor) { _gpuAccessor = gpuAccessor; - Properties = properties; + Properties = new(); _stage = stage; _stagePrefix = GetShaderStagePrefix(stage); @@ -62,15 +62,15 @@ namespace Ryujinx.Graphics.Shader.Translation _cbSlotToBindingMap.AsSpan().Fill(-1); _sbSlotToBindingMap.AsSpan().Fill(-1); - _sbSlots = new Dictionary<int, int>(); - _sbSlotsReverse = new Dictionary<int, int>(); + _sbSlots = new(); + _sbSlotsReverse = new(); - _usedConstantBufferBindings = new HashSet<int>(); + _usedConstantBufferBindings = new(); - _usedTextures = new Dictionary<TextureInfo, TextureMeta>(); - _usedImages = new Dictionary<TextureInfo, TextureMeta>(); + _usedTextures = new(); + _usedImages = new(); - properties.AddOrUpdateConstantBuffer(0, new BufferDefinition(BufferLayout.Std140, 0, 0, "support_buffer", SupportBuffer.GetStructureType())); + Properties.AddOrUpdateConstantBuffer(new(BufferLayout.Std140, 0, SupportBuffer.Binding, "support_buffer", SupportBuffer.GetStructureType())); LocalMemoryId = -1; SharedMemoryId = -1; @@ -312,11 +312,11 @@ namespace Ryujinx.Graphics.Shader.Translation if (isImage) { - Properties.AddOrUpdateImage(binding, definition); + Properties.AddOrUpdateImage(definition); } else { - Properties.AddOrUpdateTexture(binding, definition); + Properties.AddOrUpdateTexture(definition); } if (layer == 0) @@ -500,7 +500,7 @@ namespace Ryujinx.Graphics.Shader.Translation new StructureField(AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32, "data", Constants.ConstantBufferSize / 16), }); - Properties.AddOrUpdateConstantBuffer(binding, new BufferDefinition(BufferLayout.Std140, 0, binding, name, type)); + Properties.AddOrUpdateConstantBuffer(new(BufferLayout.Std140, 0, binding, name, type)); } private void AddNewStorageBuffer(int binding, string name) @@ -510,7 +510,7 @@ namespace Ryujinx.Graphics.Shader.Translation new StructureField(AggregateType.Array | AggregateType.U32, "data", 0), }); - Properties.AddOrUpdateStorageBuffer(binding, new BufferDefinition(BufferLayout.Std430, 1, binding, name, type)); + Properties.AddOrUpdateStorageBuffer(new(BufferLayout.Std430, 1, binding, name, type)); } public static string GetShaderStagePrefix(ShaderStage stage) diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs deleted file mode 100644 index 27b46867..00000000 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ /dev/null @@ -1,639 +0,0 @@ -using Ryujinx.Graphics.Shader.IntermediateRepresentation; -using Ryujinx.Graphics.Shader.StructuredIr; -using System; -using System.Collections.Generic; -using System.Numerics; - -namespace Ryujinx.Graphics.Shader.Translation -{ - class ShaderConfig - { - private const int ThreadsPerWarp = 32; - - public ShaderStage Stage { get; } - - public bool GpPassthrough { get; } - public bool LastInVertexPipeline { get; private set; } - - public bool HasLayerInputAttribute { get; private set; } - public int GpLayerInputAttribute { get; private set; } - public int ThreadsPerInputPrimitive { get; } - - public OutputTopology OutputTopology { get; } - - public int MaxOutputVertices { get; } - - public int LocalMemorySize { get; } - - public ImapPixelType[] ImapTypes { get; } - - public int OmapTargets { get; } - public bool OmapSampleMask { get; } - public bool OmapDepth { get; } - - public IGpuAccessor GpuAccessor { get; } - - public TranslationOptions Options { get; } - - public ShaderProperties Properties => ResourceManager.Properties; - - public ResourceManager ResourceManager { get; set; } - - public bool TransformFeedbackEnabled { get; } - - private TransformFeedbackOutput[] _transformFeedbackOutputs; - - readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable> - { - public IoVariable IoVariable { get; } - public int Location { get; } - public int Component { get; } - - public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0) - { - IoVariable = ioVariable; - Location = location; - Component = component; - } - - public override bool Equals(object other) - { - return other is TransformFeedbackVariable tfbVar && Equals(tfbVar); - } - - public bool Equals(TransformFeedbackVariable other) - { - return IoVariable == other.IoVariable && - Location == other.Location && - Component == other.Component; - } - - public override int GetHashCode() - { - return (int)IoVariable | (Location << 8) | (Component << 16); - } - - public override string ToString() - { - return $"{IoVariable}.{Location}.{Component}"; - } - } - - private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions; - - public int Size { get; private set; } - - public byte ClipDistancesWritten { get; private set; } - - public FeatureFlags UsedFeatures { get; private set; } - - public int Cb1DataSize { get; private set; } - - public bool LayerOutputWritten { get; private set; } - public int LayerOutputAttribute { get; private set; } - - public bool NextUsesFixedFuncAttributes { get; private set; } - public int UsedInputAttributes { get; private set; } - public int UsedOutputAttributes { get; private set; } - public HashSet<int> UsedInputAttributesPerPatch { get; } - public HashSet<int> UsedOutputAttributesPerPatch { get; } - public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; } - public int PassthroughAttributes { get; private set; } - private int _nextUsedInputAttributes; - private int _thisUsedInputAttributes; - private Dictionary<int, int> _perPatchAttributeLocations; - - public UInt128 NextInputAttributesComponents { get; private set; } - public UInt128 ThisInputAttributesComponents { get; private set; } - - public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options, int localMemorySize) - { - Stage = stage; - GpuAccessor = gpuAccessor; - Options = options; - LocalMemorySize = localMemorySize; - - _transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>(); - - TransformFeedbackEnabled = - stage != ShaderStage.Compute && - gpuAccessor.QueryTransformFeedbackEnabled() && - gpuAccessor.QueryHostSupportsTransformFeedback(); - - UsedInputAttributesPerPatch = new HashSet<int>(); - UsedOutputAttributesPerPatch = new HashSet<int>(); - - ShaderProperties properties; - - switch (stage) - { - case ShaderStage.Fragment: - bool originUpperLeft = options.TargetApi == TargetApi.Vulkan || gpuAccessor.QueryYNegateEnabled(); - properties = new ShaderProperties(originUpperLeft); - break; - default: - properties = new ShaderProperties(); - break; - } - - ResourceManager = new ResourceManager(stage, gpuAccessor, properties); - - if (!gpuAccessor.QueryHostSupportsTransformFeedback() && gpuAccessor.QueryTransformFeedbackEnabled()) - { - StructureType tfeInfoStruct = new(new StructureField[] - { - new(AggregateType.Array | AggregateType.U32, "base_offset", 4), - new(AggregateType.U32, "vertex_count"), - }); - - BufferDefinition tfeInfoBuffer = new(BufferLayout.Std430, 1, Constants.TfeInfoBinding, "tfe_info", tfeInfoStruct); - - properties.AddOrUpdateStorageBuffer(Constants.TfeInfoBinding, tfeInfoBuffer); - - StructureType tfeDataStruct = new(new StructureField[] - { - new(AggregateType.Array | AggregateType.U32, "data", 0), - }); - - for (int i = 0; i < Constants.TfeBuffersCount; i++) - { - int binding = Constants.TfeBufferBaseBinding + i; - BufferDefinition tfeDataBuffer = new(BufferLayout.Std430, 1, binding, $"tfe_data{i}", tfeDataStruct); - properties.AddOrUpdateStorageBuffer(binding, tfeDataBuffer); - } - } - } - - public ShaderConfig( - ShaderStage stage, - OutputTopology outputTopology, - int maxOutputVertices, - IGpuAccessor gpuAccessor, - TranslationOptions options) : this(stage, gpuAccessor, options, 0) - { - ThreadsPerInputPrimitive = 1; - OutputTopology = outputTopology; - MaxOutputVertices = maxOutputVertices; - } - - public ShaderConfig( - ShaderHeader header, - IGpuAccessor gpuAccessor, - TranslationOptions options) : this(header.Stage, gpuAccessor, options, GetLocalMemorySize(header)) - { - GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough; - ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive; - OutputTopology = header.OutputTopology; - MaxOutputVertices = header.MaxOutputVertexCount; - ImapTypes = header.ImapTypes; - OmapTargets = header.OmapTargets; - OmapSampleMask = header.OmapSampleMask; - OmapDepth = header.OmapDepth; - LastInVertexPipeline = header.Stage < ShaderStage.Fragment; - } - - private static int GetLocalMemorySize(ShaderHeader header) - { - return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp); - } - - private void EnsureTransformFeedbackInitialized() - { - if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null) - { - TransformFeedbackOutput[] transformFeedbackOutputs = new TransformFeedbackOutput[0xc0]; - ulong vecMap = 0UL; - - for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++) - { - var locations = GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex); - var stride = GpuAccessor.QueryTransformFeedbackStride(tfbIndex); - - for (int i = 0; i < locations.Length; i++) - { - byte wordOffset = locations[i]; - if (wordOffset < 0xc0) - { - transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride); - vecMap |= 1UL << (wordOffset / 4); - } - } - } - - _transformFeedbackOutputs = transformFeedbackOutputs; - - while (vecMap != 0) - { - int vecIndex = BitOperations.TrailingZeroCount(vecMap); - - for (int subIndex = 0; subIndex < 4; subIndex++) - { - int wordOffset = vecIndex * 4 + subIndex; - int byteOffset = wordOffset * 4; - - if (transformFeedbackOutputs[wordOffset].Valid) - { - IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location); - int component = 0; - - if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true)) - { - component = subIndex; - } - - var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component); - _transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]); - } - } - - vecMap &= ~(1UL << vecIndex); - } - } - } - - public TransformFeedbackOutput[] GetTransformFeedbackOutputs() - { - EnsureTransformFeedbackInitialized(); - return _transformFeedbackOutputs; - } - - public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput) - { - EnsureTransformFeedbackInitialized(); - var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component); - return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput); - } - - private bool HasTransformFeedbackOutputs() - { - return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment); - } - - public bool HasTransformFeedbackOutputs(bool isOutput) - { - return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment)); - } - - public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput) - { - if (ioVariable == IoVariable.UserDefined) - { - return (!isOutput && !UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) || - (isOutput && !UsedFeatures.HasFlag(FeatureFlags.OaIndexing)); - } - - return ioVariable == IoVariable.FragmentOutputColor; - } - - public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput) - { - if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput)) - { - return false; - } - - return GetTransformFeedbackOutputComponents(location, component) == 1; - } - - public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset) - { - EnsureTransformFeedbackInitialized(); - - return _transformFeedbackOutputs[wordOffset]; - } - - public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component) - { - return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component); - } - - public int GetTransformFeedbackOutputComponents(int location, int component) - { - EnsureTransformFeedbackInitialized(); - - int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4; - int index = baseIndex + component; - int count = 1; - - for (; count < 4; count++) - { - ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1]; - ref var curr = ref _transformFeedbackOutputs[baseIndex + count]; - - int prevOffset = prev.Offset; - int currOffset = curr.Offset; - - if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset) - { - break; - } - } - - if (baseIndex + count <= index) - { - return 1; - } - - return count; - } - - public AggregateType GetFragmentOutputColorType(int location) - { - return AggregateType.Vector4 | GpuAccessor.QueryFragmentOutputType(location).ToAggregateType(); - } - - public AggregateType GetUserDefinedType(int location, bool isOutput) - { - if ((!isOutput && UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) || - (isOutput && UsedFeatures.HasFlag(FeatureFlags.OaIndexing))) - { - return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32; - } - - AggregateType type = AggregateType.Vector4; - - if (Stage == ShaderStage.Vertex && !isOutput) - { - type |= GpuAccessor.QueryAttributeType(location).ToAggregateType(); - } - else - { - type |= AggregateType.FP32; - } - - return type; - } - - public int GetDepthRegister() - { - // The depth register is always two registers after the last color output. - return BitOperations.PopCount((uint)OmapTargets) + 1; - } - - public uint ConstantBuffer1Read(int offset) - { - if (Cb1DataSize < offset + 4) - { - Cb1DataSize = offset + 4; - } - - return GpuAccessor.ConstantBuffer1Read(offset); - } - - public TextureFormat GetTextureFormat(int handle, int cbufSlot = -1) - { - // When the formatted load extension is supported, we don't need to - // specify a format, we can just declare it without a format and the GPU will handle it. - if (GpuAccessor.QueryHostSupportsImageLoadFormatted()) - { - return TextureFormat.Unknown; - } - - var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot); - - if (format == TextureFormat.Unknown) - { - GpuAccessor.Log($"Unknown format for texture {handle}."); - - format = TextureFormat.R8G8B8A8Unorm; - } - - return format; - } - - private static bool FormatSupportsAtomic(TextureFormat format) - { - return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint; - } - - public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1) - { - // Atomic image instructions do not support GL_EXT_shader_image_load_formatted, - // and must have a type specified. Default to R32Sint if not available. - - var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot); - - if (!FormatSupportsAtomic(format)) - { - GpuAccessor.Log($"Unsupported format for texture {handle}: {format}."); - - format = TextureFormat.R32Sint; - } - - return format; - } - - public void SizeAdd(int size) - { - Size += size; - } - - public void InheritFrom(ShaderConfig other) - { - ClipDistancesWritten |= other.ClipDistancesWritten; - UsedFeatures |= other.UsedFeatures; - - UsedInputAttributes |= other.UsedInputAttributes; - UsedOutputAttributes |= other.UsedOutputAttributes; - } - - public void SetLayerOutputAttribute(int attr) - { - LayerOutputWritten = true; - LayerOutputAttribute = attr; - } - - public void SetGeometryShaderLayerInputAttribute(int attr) - { - HasLayerInputAttribute = true; - GpLayerInputAttribute = attr; - } - - public void SetLastInVertexPipeline() - { - LastInVertexPipeline = true; - } - - public void SetInputUserAttributeFixedFunc(int index) - { - UsedInputAttributes |= 1 << index; - } - - public void SetOutputUserAttributeFixedFunc(int index) - { - UsedOutputAttributes |= 1 << index; - } - - public void SetInputUserAttribute(int index, int component) - { - int mask = 1 << index; - - UsedInputAttributes |= mask; - _thisUsedInputAttributes |= mask; - ThisInputAttributesComponents |= UInt128.One << (index * 4 + component); - } - - public void SetInputUserAttributePerPatch(int index) - { - UsedInputAttributesPerPatch.Add(index); - } - - public void SetOutputUserAttribute(int index) - { - UsedOutputAttributes |= 1 << index; - } - - public void SetOutputUserAttributePerPatch(int index) - { - UsedOutputAttributesPerPatch.Add(index); - } - - public void MergeFromtNextStage(ShaderConfig config) - { - NextInputAttributesComponents = config.ThisInputAttributesComponents; - NextUsedInputAttributesPerPatch = config.UsedInputAttributesPerPatch; - NextUsesFixedFuncAttributes = config.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr); - MergeOutputUserAttributes(config.UsedInputAttributes, config.UsedInputAttributesPerPatch); - - if (UsedOutputAttributesPerPatch.Count != 0) - { - // Regular and per-patch input/output locations can't overlap, - // so we must assign on our location using unused regular input/output locations. - - Dictionary<int, int> locationsMap = new(); - - int freeMask = ~UsedOutputAttributes; - - foreach (int attr in UsedOutputAttributesPerPatch) - { - int location = BitOperations.TrailingZeroCount(freeMask); - if (location == 32) - { - config.GpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}."); - break; - } - - locationsMap.Add(attr, location); - freeMask &= ~(1 << location); - } - - // Both stages must agree on the locations, so use the same "map" for both. - _perPatchAttributeLocations = locationsMap; - config._perPatchAttributeLocations = locationsMap; - } - - // We don't consider geometry shaders using the geometry shader passthrough feature - // as being the last because when this feature is used, it can't actually modify any of the outputs, - // so the stage that comes before it is the last one that can do modifications. - if (config.Stage != ShaderStage.Fragment && (config.Stage != ShaderStage.Geometry || !config.GpPassthrough)) - { - LastInVertexPipeline = false; - } - } - - public void MergeOutputUserAttributes(int mask, IEnumerable<int> perPatch) - { - _nextUsedInputAttributes = mask; - - if (GpPassthrough) - { - PassthroughAttributes = mask & ~UsedOutputAttributes; - } - else - { - UsedOutputAttributes |= mask; - UsedOutputAttributesPerPatch.UnionWith(perPatch); - } - } - - public int GetPerPatchAttributeLocation(int index) - { - if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location)) - { - return index; - } - - return location; - } - - public bool IsUsedOutputAttribute(int attr) - { - // The check for fixed function attributes on the next stage is conservative, - // returning false if the output is just not used by the next stage is also valid. - if (NextUsesFixedFuncAttributes && - attr >= AttributeConsts.UserAttributeBase && - attr < AttributeConsts.UserAttributeEnd) - { - int index = (attr - AttributeConsts.UserAttributeBase) >> 4; - return (_nextUsedInputAttributes & (1 << index)) != 0; - } - - return true; - } - - public int GetFreeUserAttribute(bool isOutput, int index) - { - int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes; - int bit = -1; - - while (useMask != -1) - { - bit = BitOperations.TrailingZeroCount(~useMask); - - if (bit == 32) - { - bit = -1; - break; - } - else if (index < 1) - { - break; - } - - useMask |= 1 << bit; - index--; - } - - return bit; - } - - public void SetAllInputUserAttributes() - { - UsedInputAttributes |= Constants.AllAttributesMask; - ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4); - } - - public void SetAllOutputUserAttributes() - { - UsedOutputAttributes |= Constants.AllAttributesMask; - } - - public void SetClipDistanceWritten(int index) - { - ClipDistancesWritten |= (byte)(1 << index); - } - - public void SetUsedFeature(FeatureFlags flags) - { - UsedFeatures |= flags; - } - - public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None) - { - return new ShaderProgramInfo( - ResourceManager.GetConstantBufferDescriptors(), - ResourceManager.GetStorageBufferDescriptors(), - ResourceManager.GetTextureDescriptors(), - ResourceManager.GetImageDescriptors(), - identification, - GpLayerInputAttribute, - Stage, - UsedFeatures.HasFlag(FeatureFlags.FragCoordXY), - UsedFeatures.HasFlag(FeatureFlags.InstanceId), - UsedFeatures.HasFlag(FeatureFlags.DrawParameters), - UsedFeatures.HasFlag(FeatureFlags.RtLayer), - ClipDistancesWritten, - OmapTargets); - } - } -} diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs new file mode 100644 index 00000000..d278c42e --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs @@ -0,0 +1,315 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.Numerics; + +namespace Ryujinx.Graphics.Shader.Translation +{ + class ShaderDefinitions + { + private readonly GpuGraphicsState _graphicsState; + + public ShaderStage Stage { get; } + + public int ComputeLocalSizeX { get; } + public int ComputeLocalSizeY { get; } + public int ComputeLocalSizeZ { get; } + + public bool TessCw => _graphicsState.TessCw; + public TessPatchType TessPatchType => _graphicsState.TessPatchType; + public TessSpacing TessSpacing => _graphicsState.TessSpacing; + + public bool AlphaToCoverageDitherEnable => _graphicsState.AlphaToCoverageEnable && _graphicsState.AlphaToCoverageDitherEnable; + public bool ViewportTransformDisable => _graphicsState.ViewportTransformDisable; + + public bool DepthMode => _graphicsState.DepthMode; + + public float PointSize => _graphicsState.PointSize; + + public AlphaTestOp AlphaTestCompare => _graphicsState.AlphaTestCompare; + public float AlphaTestReference => _graphicsState.AlphaTestReference; + + public bool GpPassthrough { get; } + public bool LastInVertexPipeline { get; set; } + + public int ThreadsPerInputPrimitive { get; } + + public InputTopology InputTopology => _graphicsState.Topology; + public OutputTopology OutputTopology { get; } + + public int MaxOutputVertices { get; } + + public bool DualSourceBlend => _graphicsState.DualSourceBlendEnable; + public bool EarlyZForce => _graphicsState.EarlyZForce; + + public bool YNegateEnabled => _graphicsState.YNegateEnabled; + public bool OriginUpperLeft => _graphicsState.OriginUpperLeft; + + public ImapPixelType[] ImapTypes { get; } + public bool IaIndexing { get; private set; } + public bool OaIndexing { get; private set; } + + public int OmapTargets { get; } + public bool OmapSampleMask { get; } + public bool OmapDepth { get; } + + public bool TransformFeedbackEnabled { get; } + + private readonly TransformFeedbackOutput[] _transformFeedbackOutputs; + + readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable> + { + public IoVariable IoVariable { get; } + public int Location { get; } + public int Component { get; } + + public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0) + { + IoVariable = ioVariable; + Location = location; + Component = component; + } + + public override bool Equals(object other) + { + return other is TransformFeedbackVariable tfbVar && Equals(tfbVar); + } + + public bool Equals(TransformFeedbackVariable other) + { + return IoVariable == other.IoVariable && + Location == other.Location && + Component == other.Component; + } + + public override int GetHashCode() + { + return (int)IoVariable | (Location << 8) | (Component << 16); + } + + public override string ToString() + { + return $"{IoVariable}.{Location}.{Component}"; + } + } + + private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions; + + public ShaderDefinitions(ShaderStage stage) + { + Stage = stage; + } + + public ShaderDefinitions( + ShaderStage stage, + int computeLocalSizeX, + int computeLocalSizeY, + int computeLocalSizeZ) + { + Stage = stage; + ComputeLocalSizeX = computeLocalSizeX; + ComputeLocalSizeY = computeLocalSizeY; + ComputeLocalSizeZ = computeLocalSizeZ; + } + + public ShaderDefinitions( + ShaderStage stage, + GpuGraphicsState graphicsState, + bool gpPassthrough, + int threadsPerInputPrimitive, + OutputTopology outputTopology, + int maxOutputVertices) + { + Stage = stage; + _graphicsState = graphicsState; + GpPassthrough = gpPassthrough; + ThreadsPerInputPrimitive = threadsPerInputPrimitive; + OutputTopology = outputTopology; + MaxOutputVertices = maxOutputVertices; + } + + public ShaderDefinitions( + ShaderStage stage, + GpuGraphicsState graphicsState, + bool gpPassthrough, + int threadsPerInputPrimitive, + OutputTopology outputTopology, + int maxOutputVertices, + ImapPixelType[] imapTypes, + int omapTargets, + bool omapSampleMask, + bool omapDepth, + bool transformFeedbackEnabled, + ulong transformFeedbackVecMap, + TransformFeedbackOutput[] transformFeedbackOutputs) + { + Stage = stage; + _graphicsState = graphicsState; + GpPassthrough = gpPassthrough; + ThreadsPerInputPrimitive = threadsPerInputPrimitive; + OutputTopology = outputTopology; + MaxOutputVertices = maxOutputVertices; + ImapTypes = imapTypes; + OmapTargets = omapTargets; + OmapSampleMask = omapSampleMask; + OmapDepth = omapDepth; + LastInVertexPipeline = stage < ShaderStage.Fragment; + TransformFeedbackEnabled = transformFeedbackEnabled; + _transformFeedbackOutputs = transformFeedbackOutputs; + _transformFeedbackDefinitions = new(); + + while (transformFeedbackVecMap != 0) + { + int vecIndex = BitOperations.TrailingZeroCount(transformFeedbackVecMap); + + for (int subIndex = 0; subIndex < 4; subIndex++) + { + int wordOffset = vecIndex * 4 + subIndex; + int byteOffset = wordOffset * 4; + + if (transformFeedbackOutputs[wordOffset].Valid) + { + IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location); + int component = 0; + + if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true)) + { + component = subIndex; + } + + var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component); + _transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]); + } + } + + transformFeedbackVecMap &= ~(1UL << vecIndex); + } + } + + public void EnableInputIndexing() + { + IaIndexing = true; + } + + public void EnableOutputIndexing() + { + OaIndexing = true; + } + + public TransformFeedbackOutput[] GetTransformFeedbackOutputs() + { + if (!HasTransformFeedbackOutputs()) + { + return null; + } + + return _transformFeedbackOutputs; + } + + public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput) + { + if (!HasTransformFeedbackOutputs()) + { + transformFeedbackOutput = default; + return false; + } + + var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component); + return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput); + } + + private bool HasTransformFeedbackOutputs() + { + return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment); + } + + public bool HasTransformFeedbackOutputs(bool isOutput) + { + return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment)); + } + + public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput) + { + if (ioVariable == IoVariable.UserDefined) + { + return (!isOutput && !IaIndexing) || (isOutput && !OaIndexing); + } + + return ioVariable == IoVariable.FragmentOutputColor; + } + + public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput) + { + if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput)) + { + return false; + } + + return GetTransformFeedbackOutputComponents(location, component) == 1; + } + + public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset) + { + return _transformFeedbackOutputs[wordOffset]; + } + + public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component) + { + return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component); + } + + public int GetTransformFeedbackOutputComponents(int location, int component) + { + int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4; + int index = baseIndex + component; + int count = 1; + + for (; count < 4; count++) + { + ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1]; + ref var curr = ref _transformFeedbackOutputs[baseIndex + count]; + + int prevOffset = prev.Offset; + int currOffset = curr.Offset; + + if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset) + { + break; + } + } + + if (baseIndex + count <= index) + { + return 1; + } + + return count; + } + + public AggregateType GetFragmentOutputColorType(int location) + { + return AggregateType.Vector4 | _graphicsState.FragmentOutputTypes[location].ToAggregateType(); + } + + public AggregateType GetUserDefinedType(int location, bool isOutput) + { + if ((!isOutput && IaIndexing) || (isOutput && OaIndexing)) + { + return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32; + } + + AggregateType type = AggregateType.Vector4; + + if (Stage == ShaderStage.Vertex && !isOutput) + { + type |= _graphicsState.AttributeTypes[location].ToAggregateType(); + } + else + { + type |= AggregateType.FP32; + } + + return type; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs index e9c25994..c077e1cd 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs @@ -5,18 +5,22 @@ namespace Ryujinx.Graphics.Shader.Translation { static class ShaderIdentifier { - public static ShaderIdentification Identify(IReadOnlyList<Function> functions, ShaderConfig config) + public static ShaderIdentification Identify( + IReadOnlyList<Function> functions, + IGpuAccessor gpuAccessor, + ShaderStage stage, + InputTopology inputTopology, + out int layerInputAttr) { - if (config.Stage == ShaderStage.Geometry && - config.GpuAccessor.QueryPrimitiveTopology() == InputTopology.Triangles && - !config.GpuAccessor.QueryHostSupportsGeometryShader() && - IsLayerPassthroughGeometryShader(functions, out int layerInputAttr)) + if (stage == ShaderStage.Geometry && + inputTopology == InputTopology.Triangles && + !gpuAccessor.QueryHostSupportsGeometryShader() && + IsLayerPassthroughGeometryShader(functions, out layerInputAttr)) { - config.SetGeometryShaderLayerInputAttribute(layerInputAttr); - return ShaderIdentification.GeometryLayerPassthrough; } + layerInputAttr = 0; return ShaderIdentification.None; } diff --git a/src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs new file mode 100644 index 00000000..fa687eca --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs @@ -0,0 +1,33 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +namespace Ryujinx.Graphics.Shader.Translation +{ + readonly ref struct TransformContext + { + public readonly HelperFunctionManager Hfm; + public readonly BasicBlock[] Blocks; + public readonly ResourceManager ResourceManager; + public readonly IGpuAccessor GpuAccessor; + public readonly TargetLanguage TargetLanguage; + public readonly ShaderStage Stage; + public readonly ref FeatureFlags UsedFeatures; + + public TransformContext( + HelperFunctionManager hfm, + BasicBlock[] blocks, + ResourceManager resourceManager, + IGpuAccessor gpuAccessor, + TargetLanguage targetLanguage, + ShaderStage stage, + ref FeatureFlags usedFeatures) + { + Hfm = hfm; + Blocks = blocks; + ResourceManager = resourceManager; + GpuAccessor = gpuAccessor; + TargetLanguage = targetLanguage; + Stage = stage; + UsedFeatures = ref usedFeatures; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/TransformFeedbackOutput.cs b/src/Ryujinx.Graphics.Shader/Translation/TransformFeedbackOutput.cs new file mode 100644 index 00000000..7d5c7462 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TransformFeedbackOutput.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + readonly struct TransformFeedbackOutput + { + public readonly bool Valid; + public readonly int Buffer; + public readonly int Offset; + public readonly int Stride; + + public TransformFeedbackOutput(int buffer, int offset, int stride) + { + Valid = true; + Buffer = buffer; + Offset = offset; + Stride = stride; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/DrawParametersReplace.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/DrawParametersReplace.cs new file mode 100644 index 00000000..9e73013d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/DrawParametersReplace.cs @@ -0,0 +1,93 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Transforms +{ + class DrawParametersReplace : ITransformPass + { + public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures) + { + return stage == ShaderStage.Vertex; + } + + public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node) + { + Operation operation = (Operation)node.Value; + + if (context.GpuAccessor.QueryHasConstantBufferDrawParameters()) + { + if (ReplaceConstantBufferWithDrawParameters(node, operation)) + { + context.UsedFeatures |= FeatureFlags.DrawParameters; + } + } + else if (HasConstantBufferDrawParameters(operation)) + { + context.UsedFeatures |= FeatureFlags.DrawParameters; + } + + return node; + } + + private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation) + { + Operand GenerateLoad(IoVariable ioVariable) + { + Operand value = Local(); + node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable))); + return value; + } + + bool modified = false; + + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + Operand src = operation.GetSource(srcIndex); + + if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0) + { + switch (src.GetCbufOffset()) + { + case Constants.NvnBaseVertexByteOffset / 4: + operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex)); + modified = true; + break; + case Constants.NvnBaseInstanceByteOffset / 4: + operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance)); + modified = true; + break; + case Constants.NvnDrawIndexByteOffset / 4: + operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex)); + modified = true; + break; + } + } + } + + return modified; + } + + private static bool HasConstantBufferDrawParameters(Operation operation) + { + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + Operand src = operation.GetSource(srcIndex); + + if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0) + { + switch (src.GetCbufOffset()) + { + case Constants.NvnBaseVertexByteOffset / 4: + case Constants.NvnBaseInstanceByteOffset / 4: + case Constants.NvnDrawIndexByteOffset / 4: + return true; + } + } + } + + return false; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/ForcePreciseEnable.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/ForcePreciseEnable.cs new file mode 100644 index 00000000..6b7e1410 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/ForcePreciseEnable.cs @@ -0,0 +1,36 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.Translation.Transforms +{ + class ForcePreciseEnable : ITransformPass + { + public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures) + { + return stage == ShaderStage.Fragment && gpuAccessor.QueryHostReducedPrecision(); + } + + public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node) + { + // There are some cases where a small bias is added to values to prevent division by zero. + // When operating with reduced precision, it is possible for this bias to get rounded to 0 + // and cause a division by zero. + // To prevent that, we force those operations to be precise even if the host wants + // imprecise operations for performance. + + Operation operation = (Operation)node.Value; + + if (operation.Inst == (Instruction.FP32 | Instruction.Divide) && + operation.GetSource(0).Type == OperandType.Constant && + operation.GetSource(0).AsFloat() == 1f && + operation.GetSource(1).AsgOp is Operation addOp && + addOp.Inst == (Instruction.FP32 | Instruction.Add) && + addOp.GetSource(1).Type == OperandType.Constant) + { + addOp.ForcePrecise = true; + } + + return node; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/ITransformPass.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/ITransformPass.cs new file mode 100644 index 00000000..0a109d1d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/ITransformPass.cs @@ -0,0 +1,11 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.Translation.Transforms +{ + interface ITransformPass + { + abstract static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures); + abstract static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node); + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedAtomicSignedCas.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedAtomicSignedCas.cs new file mode 100644 index 00000000..112b3b19 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedAtomicSignedCas.cs @@ -0,0 +1,58 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation.Optimizations; +using System.Collections.Generic; +using System.Diagnostics; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Transforms +{ + class SharedAtomicSignedCas : ITransformPass + { + public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures) + { + return targetLanguage != TargetLanguage.Spirv && stage == ShaderStage.Compute && usedFeatures.HasFlag(FeatureFlags.SharedMemory); + } + + public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node) + { + Operation operation = (Operation)node.Value; + HelperFunctionName name; + + if (operation.Inst == Instruction.AtomicMaxS32) + { + name = HelperFunctionName.SharedAtomicMaxS32; + } + else if (operation.Inst == Instruction.AtomicMinS32) + { + name = HelperFunctionName.SharedAtomicMinS32; + } + else + { + return node; + } + + if (operation.StorageKind != StorageKind.SharedMemory) + { + return node; + } + + Operand result = operation.Dest; + Operand memoryId = operation.GetSource(0); + Operand byteOffset = operation.GetSource(1); + Operand value = operation.GetSource(2); + + Debug.Assert(memoryId.Type == OperandType.Constant); + + int functionId = context.Hfm.GetOrCreateFunctionId(name, memoryId.Value); + + Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value }; + + LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs)); + + Utils.DeleteNode(node, operation); + + return newNode; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedStoreSmallIntCas.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedStoreSmallIntCas.cs new file mode 100644 index 00000000..e58be0a8 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/SharedStoreSmallIntCas.cs @@ -0,0 +1,57 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation.Optimizations; +using System.Collections.Generic; +using System.Diagnostics; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Transforms +{ + class SharedStoreSmallIntCas : ITransformPass + { + public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures) + { + return stage == ShaderStage.Compute && usedFeatures.HasFlag(FeatureFlags.SharedMemory); + } + + public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node) + { + Operation operation = (Operation)node.Value; + HelperFunctionName name; + + if (operation.StorageKind == StorageKind.SharedMemory8) + { + name = HelperFunctionName.SharedStore8; + } + else if (operation.StorageKind == StorageKind.SharedMemory16) + { + name = HelperFunctionName.SharedStore16; + } + else + { + return node; + } + + if (operation.Inst != Instruction.Store) + { + return node; + } + + Operand memoryId = operation.GetSource(0); + Operand byteOffset = operation.GetSource(1); + Operand value = operation.GetSource(2); + + Debug.Assert(memoryId.Type == OperandType.Constant); + + int functionId = context.Hfm.GetOrCreateFunctionId(name, memoryId.Value); + + Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value }; + + LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs)); + + Utils.DeleteNode(node, operation); + + return newNode; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs index 0fa75203..5ceed4b7 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs @@ -1,268 +1,45 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; -using Ryujinx.Graphics.Shader.StructuredIr; -using Ryujinx.Graphics.Shader.Translation.Optimizations; using System.Collections.Generic; -using System.Diagnostics; using System.Linq; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; -namespace Ryujinx.Graphics.Shader.Translation +namespace Ryujinx.Graphics.Shader.Translation.Transforms { - static class Rewriter + class TexturePass : ITransformPass { - public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config) + public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures) { - bool isVertexShader = config.Stage == ShaderStage.Vertex; - bool isImpreciseFragmentShader = config.Stage == ShaderStage.Fragment && config.GpuAccessor.QueryHostReducedPrecision(); - bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters(); - bool hasVectorIndexingBug = config.GpuAccessor.QueryHostHasVectorIndexingBug(); - bool supportsSnormBufferTextureFormat = config.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat(); - - for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) - { - BasicBlock block = blocks[blkIndex]; - - for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) - { - if (node.Value is not Operation operation) - { - continue; - } - - if (isVertexShader) - { - if (hasConstantBufferDrawParameters) - { - if (ReplaceConstantBufferWithDrawParameters(node, operation)) - { - config.SetUsedFeature(FeatureFlags.DrawParameters); - } - } - else if (HasConstantBufferDrawParameters(operation)) - { - config.SetUsedFeature(FeatureFlags.DrawParameters); - } - } - - if (isImpreciseFragmentShader) - { - EnableForcePreciseIfNeeded(operation); - } - - if (hasVectorIndexingBug) - { - InsertVectorComponentSelect(node, config); - } - - if (operation is TextureOperation texOp) - { - node = InsertTexelFetchScale(hfm, node, config); - node = InsertTextureSizeUnscale(hfm, node, config); - - if (texOp.Inst == Instruction.TextureSample) - { - node = InsertCoordNormalization(hfm, node, config); - node = InsertCoordGatherBias(node, config); - node = InsertConstOffsets(node, config); - - if (texOp.Type == SamplerType.TextureBuffer && !supportsSnormBufferTextureFormat) - { - node = InsertSnormNormalization(node, config); - } - } - } - else - { - node = InsertSharedStoreSmallInt(hfm, node); - - if (config.Options.TargetLanguage != TargetLanguage.Spirv) - { - node = InsertSharedAtomicSigned(hfm, node); - } - } - } - } + return true; } - private static void EnableForcePreciseIfNeeded(Operation operation) + public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node) { - // There are some cases where a small bias is added to values to prevent division by zero. - // When operating with reduced precision, it is possible for this bias to get rounded to 0 - // and cause a division by zero. - // To prevent that, we force those operations to be precise even if the host wants - // imprecise operations for performance. - - if (operation.Inst == (Instruction.FP32 | Instruction.Divide) && - operation.GetSource(0).Type == OperandType.Constant && - operation.GetSource(0).AsFloat() == 1f && - operation.GetSource(1).AsgOp is Operation addOp && - addOp.Inst == (Instruction.FP32 | Instruction.Add) && - addOp.GetSource(1).Type == OperandType.Constant) + if (node.Value is TextureOperation texOp) { - addOp.ForcePrecise = true; - } - } + node = InsertTexelFetchScale(context.Hfm, node, context.ResourceManager, context.Stage); + node = InsertTextureSizeUnscale(context.Hfm, node, context.ResourceManager, context.Stage); - private static void InsertVectorComponentSelect(LinkedListNode<INode> node, ShaderConfig config) - { - Operation operation = (Operation)node.Value; - - if (operation.Inst != Instruction.Load || - operation.StorageKind != StorageKind.ConstantBuffer || - operation.SourcesCount < 3) - { - return; - } - - Operand bindingIndex = operation.GetSource(0); - Operand fieldIndex = operation.GetSource(1); - Operand elemIndex = operation.GetSource(operation.SourcesCount - 1); - - if (bindingIndex.Type != OperandType.Constant || - fieldIndex.Type != OperandType.Constant || - elemIndex.Type == OperandType.Constant) - { - return; - } - - BufferDefinition buffer = config.Properties.ConstantBuffers[bindingIndex.Value]; - StructureField field = buffer.Type.Fields[fieldIndex.Value]; - - int elemCount = (field.Type & AggregateType.ElementCountMask) switch - { - AggregateType.Vector2 => 2, - AggregateType.Vector3 => 3, - AggregateType.Vector4 => 4, - _ => 1, - }; - - if (elemCount == 1) - { - return; - } - - Operand result = null; - - for (int i = 0; i < elemCount; i++) - { - Operand value = Local(); - Operand[] inputs = new Operand[operation.SourcesCount]; - - for (int srcIndex = 0; srcIndex < inputs.Length - 1; srcIndex++) - { - inputs[srcIndex] = operation.GetSource(srcIndex); - } - - inputs[^1] = Const(i); - - Operation loadOp = new(Instruction.Load, StorageKind.ConstantBuffer, value, inputs); - - node.List.AddBefore(node, loadOp); - - if (i == 0) - { - result = value; - } - else + if (texOp.Inst == Instruction.TextureSample) { - Operand isCurrentIndex = Local(); - Operand selection = Local(); + node = InsertCoordNormalization(context.Hfm, node, context.ResourceManager, context.GpuAccessor, context.Stage); + node = InsertCoordGatherBias(node, context.ResourceManager, context.GpuAccessor); + node = InsertConstOffsets(node, context.ResourceManager, context.GpuAccessor); - Operation compareOp = new(Instruction.CompareEqual, isCurrentIndex, new Operand[] { elemIndex, Const(i) }); - Operation selectOp = new(Instruction.ConditionalSelect, selection, new Operand[] { isCurrentIndex, value, result }); - - node.List.AddBefore(node, compareOp); - node.List.AddBefore(node, selectOp); - - result = selection; + if (texOp.Type == SamplerType.TextureBuffer && !context.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat()) + { + node = InsertSnormNormalization(node, context.ResourceManager, context.GpuAccessor); + } } } - operation.TurnIntoCopy(result); - } - - private static LinkedListNode<INode> InsertSharedStoreSmallInt(HelperFunctionManager hfm, LinkedListNode<INode> node) - { - Operation operation = (Operation)node.Value; - HelperFunctionName name; - - if (operation.StorageKind == StorageKind.SharedMemory8) - { - name = HelperFunctionName.SharedStore8; - } - else if (operation.StorageKind == StorageKind.SharedMemory16) - { - name = HelperFunctionName.SharedStore16; - } - else - { - return node; - } - - if (operation.Inst != Instruction.Store) - { - return node; - } - - Operand memoryId = operation.GetSource(0); - Operand byteOffset = operation.GetSource(1); - Operand value = operation.GetSource(2); - - Debug.Assert(memoryId.Type == OperandType.Constant); - - int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value); - - Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value }; - - LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs)); - - Utils.DeleteNode(node, operation); - - return newNode; - } - - private static LinkedListNode<INode> InsertSharedAtomicSigned(HelperFunctionManager hfm, LinkedListNode<INode> node) - { - Operation operation = (Operation)node.Value; - HelperFunctionName name; - - if (operation.Inst == Instruction.AtomicMaxS32) - { - name = HelperFunctionName.SharedAtomicMaxS32; - } - else if (operation.Inst == Instruction.AtomicMinS32) - { - name = HelperFunctionName.SharedAtomicMinS32; - } - else - { - return node; - } - - if (operation.StorageKind != StorageKind.SharedMemory) - { - return node; - } - - Operand result = operation.Dest; - Operand memoryId = operation.GetSource(0); - Operand byteOffset = operation.GetSource(1); - Operand value = operation.GetSource(2); - - Debug.Assert(memoryId.Type == OperandType.Constant); - - int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value); - - Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value }; - - LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs)); - - Utils.DeleteNode(node, operation); - - return newNode; + return node; } - private static LinkedListNode<INode> InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config) + private static LinkedListNode<INode> InsertTexelFetchScale( + HelperFunctionManager hfm, + LinkedListNode<INode> node, + ResourceManager resourceManager, + ShaderStage stage) { TextureOperation texOp = (TextureOperation)node.Value; @@ -280,20 +57,20 @@ namespace Ryujinx.Graphics.Shader.Translation (intCoords || isImage) && !isBindless && !isIndexed && - config.Stage.SupportsRenderScale() && + stage.SupportsRenderScale() && TypeSupportsScale(texOp.Type)) { int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.TexelFetchScale); int samplerIndex = isImage - ? config.ResourceManager.GetTextureDescriptors().Length + config.ResourceManager.FindImageDescriptorIndex(texOp.Binding) - : config.ResourceManager.FindTextureDescriptorIndex(texOp.Binding); + ? resourceManager.GetTextureDescriptors().Length + resourceManager.FindImageDescriptorIndex(texOp.Binding) + : resourceManager.FindTextureDescriptorIndex(texOp.Binding); for (int index = 0; index < coordsCount; index++) { Operand scaledCoord = Local(); Operand[] callArgs; - if (config.Stage == ShaderStage.Fragment) + if (stage == ShaderStage.Fragment) { callArgs = new Operand[] { Const(functionId), texOp.GetSource(coordsIndex + index), Const(samplerIndex), Const(index) }; } @@ -311,7 +88,11 @@ namespace Ryujinx.Graphics.Shader.Translation return node; } - private static LinkedListNode<INode> InsertTextureSizeUnscale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config) + private static LinkedListNode<INode> InsertTextureSizeUnscale( + HelperFunctionManager hfm, + LinkedListNode<INode> node, + ResourceManager resourceManager, + ShaderStage stage) { TextureOperation texOp = (TextureOperation)node.Value; @@ -322,11 +103,11 @@ namespace Ryujinx.Graphics.Shader.Translation texOp.Index < 2 && !isBindless && !isIndexed && - config.Stage.SupportsRenderScale() && + stage.SupportsRenderScale() && TypeSupportsScale(texOp.Type)) { int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.TextureSizeUnscale); - int samplerIndex = config.ResourceManager.FindTextureDescriptorIndex(texOp.Binding); + int samplerIndex = resourceManager.FindTextureDescriptorIndex(texOp.Binding); for (int index = texOp.DestsCount - 1; index >= 0; index--) { @@ -356,19 +137,12 @@ namespace Ryujinx.Graphics.Shader.Translation return node; } - private static bool IsImageInstructionWithScale(Instruction inst) - { - // Currently, we don't support scaling images that are modified, - // so we only need to care about the load instruction. - return inst == Instruction.ImageLoad; - } - - private static bool TypeSupportsScale(SamplerType type) - { - return (type & SamplerType.Mask) == SamplerType.Texture2D; - } - - private static LinkedListNode<INode> InsertCoordNormalization(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config) + private static LinkedListNode<INode> InsertCoordNormalization( + HelperFunctionManager hfm, + LinkedListNode<INode> node, + ResourceManager resourceManager, + IGpuAccessor gpuAccessor, + ShaderStage stage) { // Emulate non-normalized coordinates by normalizing the coordinates on the shader. // Without normalization, the coordinates are expected to the in the [0, W or H] range, @@ -386,9 +160,9 @@ namespace Ryujinx.Graphics.Shader.Translation bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; - (int cbufSlot, int handle) = config.ResourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding); + (int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding); - bool isCoordNormalized = config.GpuAccessor.QueryTextureCoordNormalized(handle, cbufSlot); + bool isCoordNormalized = gpuAccessor.QueryTextureCoordNormalized(handle, cbufSlot); if (isCoordNormalized || intCoords) { @@ -400,8 +174,6 @@ namespace Ryujinx.Graphics.Shader.Translation int coordsCount = texOp.Type.GetDimensions(); int coordsIndex = isBindless || isIndexed ? 1 : 0; - config.SetUsedFeature(FeatureFlags.IntegerSampling); - int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount; for (int index = 0; index < normCoordsCount; index++) @@ -429,7 +201,7 @@ namespace Ryujinx.Graphics.Shader.Translation new[] { coordSize }, texSizeSources)); - config.ResourceManager.SetUsageFlagsForTextureQuery(texOp.Binding, texOp.Type); + resourceManager.SetUsageFlagsForTextureQuery(texOp.Binding, texOp.Type); Operand source = texOp.GetSource(coordsIndex + index); @@ -439,13 +211,13 @@ namespace Ryujinx.Graphics.Shader.Translation texOp.SetSource(coordsIndex + index, coordNormalized); - InsertTextureSizeUnscale(hfm, textureSizeNode, config); + InsertTextureSizeUnscale(hfm, textureSizeNode, resourceManager, stage); } return node; } - private static LinkedListNode<INode> InsertCoordGatherBias(LinkedListNode<INode> node, ShaderConfig config) + private static LinkedListNode<INode> InsertCoordGatherBias(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor) { // The gather behavior when the coordinate sits right in the middle of two texels is not well defined. // To ensure the correct texel is sampled, we add a small bias value to the coordinate. @@ -457,25 +229,18 @@ namespace Ryujinx.Graphics.Shader.Translation bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; - int gatherBiasPrecision = config.GpuAccessor.QueryHostGatherBiasPrecision(); + int gatherBiasPrecision = gpuAccessor.QueryHostGatherBiasPrecision(); if (!isGather || gatherBiasPrecision == 0) { return node; } -#pragma warning disable IDE0059 // Remove unnecessary value assignment - bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; - - bool isArray = (texOp.Type & SamplerType.Array) != 0; bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; -#pragma warning restore IDE0059 int coordsCount = texOp.Type.GetDimensions(); int coordsIndex = isBindless || isIndexed ? 1 : 0; - config.SetUsedFeature(FeatureFlags.IntegerSampling); - int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount; for (int index = 0; index < normCoordsCount; index++) @@ -524,7 +289,7 @@ namespace Ryujinx.Graphics.Shader.Translation return node; } - private static LinkedListNode<INode> InsertConstOffsets(LinkedListNode<INode> node, ShaderConfig config) + private static LinkedListNode<INode> InsertConstOffsets(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor) { // Non-constant texture offsets are not allowed (according to the spec), // however some GPUs does support that. @@ -540,7 +305,7 @@ namespace Ryujinx.Graphics.Shader.Translation bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; - bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset(); + bool hasInvalidOffset = (hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset(); bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; @@ -673,8 +438,6 @@ namespace Ryujinx.Graphics.Shader.Translation if (isGather && !isShadow) { - config.SetUsedFeature(FeatureFlags.IntegerSampling); - Operand[] newSources = new Operand[sources.Length]; sources.CopyTo(newSources, 0); @@ -741,8 +504,6 @@ namespace Ryujinx.Graphics.Shader.Translation } else { - config.SetUsedFeature(FeatureFlags.IntegerSampling); - Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount); for (int index = 0; index < coordsCount; index++) @@ -840,7 +601,7 @@ namespace Ryujinx.Graphics.Shader.Translation return texSizes; } - private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config) + private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor) { TextureOperation texOp = (TextureOperation)node.Value; @@ -851,9 +612,9 @@ namespace Ryujinx.Graphics.Shader.Translation return node; } - (int cbufSlot, int handle) = config.ResourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding); + (int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding); - TextureFormat format = config.GpuAccessor.QueryTextureFormat(handle, cbufSlot); + TextureFormat format = gpuAccessor.QueryTextureFormat(handle, cbufSlot); int maxPositive = format switch { @@ -926,63 +687,16 @@ namespace Ryujinx.Graphics.Shader.Translation return res; } - private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation) + private static bool IsImageInstructionWithScale(Instruction inst) { - Operand GenerateLoad(IoVariable ioVariable) - { - Operand value = Local(); - node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable))); - return value; - } - - bool modified = false; - - for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) - { - Operand src = operation.GetSource(srcIndex); - - if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0) - { - switch (src.GetCbufOffset()) - { - case Constants.NvnBaseVertexByteOffset / 4: - operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex)); - modified = true; - break; - case Constants.NvnBaseInstanceByteOffset / 4: - operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance)); - modified = true; - break; - case Constants.NvnDrawIndexByteOffset / 4: - operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex)); - modified = true; - break; - } - } - } - - return modified; + // Currently, we don't support scaling images that are modified, + // so we only need to care about the load instruction. + return inst == Instruction.ImageLoad; } - private static bool HasConstantBufferDrawParameters(Operation operation) + private static bool TypeSupportsScale(SamplerType type) { - for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) - { - Operand src = operation.GetSource(srcIndex); - - if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0) - { - switch (src.GetCbufOffset()) - { - case Constants.NvnBaseVertexByteOffset / 4: - case Constants.NvnBaseInstanceByteOffset / 4: - case Constants.NvnDrawIndexByteOffset / 4: - return true; - } - } - } - - return false; + return (type & SamplerType.Mask) == SamplerType.Texture2D; } } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs new file mode 100644 index 00000000..c3bbe7dd --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs @@ -0,0 +1,41 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.Translation.Transforms +{ + static class TransformPasses + { + public static void RunPass(TransformContext context) + { + RunPass<DrawParametersReplace>(context); + RunPass<ForcePreciseEnable>(context); + RunPass<VectorComponentSelect>(context); + RunPass<TexturePass>(context); + RunPass<SharedStoreSmallIntCas>(context); + RunPass<SharedAtomicSignedCas>(context); + } + + private static void RunPass<T>(TransformContext context) where T : ITransformPass + { + if (!T.IsEnabled(context.GpuAccessor, context.Stage, context.TargetLanguage, context.UsedFeatures)) + { + return; + } + + for (int blkIndex = 0; blkIndex < context.Blocks.Length; blkIndex++) + { + BasicBlock block = context.Blocks[blkIndex]; + + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + if (node.Value is not Operation) + { + continue; + } + + node = T.RunPass(context, node); + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/VectorComponentSelect.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/VectorComponentSelect.cs new file mode 100644 index 00000000..e55f4355 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/VectorComponentSelect.cs @@ -0,0 +1,96 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Transforms +{ + class VectorComponentSelect : ITransformPass + { + public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures) + { + return gpuAccessor.QueryHostHasVectorIndexingBug(); + } + + public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node) + { + Operation operation = (Operation)node.Value; + + if (operation.Inst != Instruction.Load || + operation.StorageKind != StorageKind.ConstantBuffer || + operation.SourcesCount < 3) + { + return node; + } + + Operand bindingIndex = operation.GetSource(0); + Operand fieldIndex = operation.GetSource(1); + Operand elemIndex = operation.GetSource(operation.SourcesCount - 1); + + if (bindingIndex.Type != OperandType.Constant || + fieldIndex.Type != OperandType.Constant || + elemIndex.Type == OperandType.Constant) + { + return node; + } + + BufferDefinition buffer = context.ResourceManager.Properties.ConstantBuffers[bindingIndex.Value]; + StructureField field = buffer.Type.Fields[fieldIndex.Value]; + + int elemCount = (field.Type & AggregateType.ElementCountMask) switch + { + AggregateType.Vector2 => 2, + AggregateType.Vector3 => 3, + AggregateType.Vector4 => 4, + _ => 1 + }; + + if (elemCount == 1) + { + return node; + } + + Operand result = null; + + for (int i = 0; i < elemCount; i++) + { + Operand value = Local(); + Operand[] inputs = new Operand[operation.SourcesCount]; + + for (int srcIndex = 0; srcIndex < inputs.Length - 1; srcIndex++) + { + inputs[srcIndex] = operation.GetSource(srcIndex); + } + + inputs[^1] = Const(i); + + Operation loadOp = new(Instruction.Load, StorageKind.ConstantBuffer, value, inputs); + + node.List.AddBefore(node, loadOp); + + if (i == 0) + { + result = value; + } + else + { + Operand isCurrentIndex = Local(); + Operand selection = Local(); + + Operation compareOp = new(Instruction.CompareEqual, isCurrentIndex, new Operand[] { elemIndex, Const(i) }); + Operation selectOp = new(Instruction.ConditionalSelect, selection, new Operand[] { isCurrentIndex, value, result }); + + node.List.AddBefore(node, compareOp); + node.List.AddBefore(node, selectOp); + + result = selection; + } + } + + operation.TurnIntoCopy(result); + + return node; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs index 010c80db..b609ac07 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -1,11 +1,6 @@ -using Ryujinx.Graphics.Shader.CodeGen.Glsl; -using Ryujinx.Graphics.Shader.CodeGen.Spirv; using Ryujinx.Graphics.Shader.Decoders; using Ryujinx.Graphics.Shader.IntermediateRepresentation; -using Ryujinx.Graphics.Shader.StructuredIr; -using Ryujinx.Graphics.Shader.Translation.Optimizations; using System; -using System.Collections.Generic; using System.Linq; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; @@ -13,6 +8,7 @@ namespace Ryujinx.Graphics.Shader.Translation { public static class Translator { + private const int ThreadsPerWarp = 32; private const int HeaderSize = 0x50; internal readonly struct FunctionCode @@ -30,111 +26,112 @@ namespace Ryujinx.Graphics.Shader.Translation return DecodeShader(address, gpuAccessor, options); } - internal static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config) + private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options) { - var cfgs = new ControlFlowGraph[functions.Length]; - var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length]; + int localMemorySize; + ShaderDefinitions definitions; + DecodedProgram program; - for (int i = 0; i < functions.Length; i++) + if (options.Flags.HasFlag(TranslationFlags.Compute)) { - cfgs[i] = ControlFlowGraph.Create(functions[i].Code); + definitions = CreateComputeDefinitions(gpuAccessor); + localMemorySize = gpuAccessor.QueryComputeLocalMemorySize(); - if (i != 0) - { - frus[i] = RegisterUsage.RunPass(cfgs[i]); - } + program = Decoder.Decode(definitions, gpuAccessor, address); } + else + { + ShaderHeader header = new(gpuAccessor, address); - List<Function> funcs = new(functions.Length); + definitions = CreateGraphicsDefinitions(gpuAccessor, header); + localMemorySize = GetLocalMemorySize(header); - for (int i = 0; i < functions.Length; i++) - { - funcs.Add(null); + program = Decoder.Decode(definitions, gpuAccessor, address + HeaderSize); } - HelperFunctionManager hfm = new(funcs, config.Stage); + ulong maxEndAddress = 0; - for (int i = 0; i < functions.Length; i++) + foreach (DecodedFunction function in program) { - var cfg = cfgs[i]; - - int inArgumentsCount = 0; - int outArgumentsCount = 0; - - if (i != 0) - { - var fru = frus[i]; - - inArgumentsCount = fru.InArguments.Length; - outArgumentsCount = fru.OutArguments.Length; - } - - if (cfg.Blocks.Length != 0) + foreach (Block block in function.Blocks) { - RegisterUsage.FixupCalls(cfg.Blocks, frus); - - Dominance.FindDominators(cfg); - Dominance.FindDominanceFrontiers(cfg.Blocks); - - Ssa.Rename(cfg.Blocks); - - Optimizer.RunPass(hfm, cfg.Blocks, config); - Rewriter.RunPass(hfm, cfg.Blocks, config); + if (maxEndAddress < block.EndAddress) + { + maxEndAddress = block.EndAddress; + } } - - funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount); } - var identification = ShaderIdentifier.Identify(funcs, config); - - var sInfo = StructuredProgram.MakeStructuredProgram(funcs, config); - - var info = config.CreateProgramInfo(identification); + int size = (int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize); - return config.Options.TargetLanguage switch - { - TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)), - TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)), - _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()), - }; + return new TranslatorContext(address, size, localMemorySize, definitions, gpuAccessor, options, program); } - private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options) + private static ShaderDefinitions CreateComputeDefinitions(IGpuAccessor gpuAccessor) { - ShaderConfig config; - DecodedProgram program; - ulong maxEndAddress = 0; + return new ShaderDefinitions( + ShaderStage.Compute, + gpuAccessor.QueryComputeLocalSizeX(), + gpuAccessor.QueryComputeLocalSizeY(), + gpuAccessor.QueryComputeLocalSizeZ()); + } - if (options.Flags.HasFlag(TranslationFlags.Compute)) - { - config = new ShaderConfig(ShaderStage.Compute, gpuAccessor, options, gpuAccessor.QueryComputeLocalMemorySize()); + private static ShaderDefinitions CreateGraphicsDefinitions(IGpuAccessor gpuAccessor, ShaderHeader header) + { + bool transformFeedbackEnabled = + gpuAccessor.QueryTransformFeedbackEnabled() && + gpuAccessor.QueryHostSupportsTransformFeedback(); + TransformFeedbackOutput[] transformFeedbackOutputs = null; + ulong transformFeedbackVecMap = 0UL; - program = Decoder.Decode(config, address); - } - else + if (transformFeedbackEnabled) { - config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options); - - program = Decoder.Decode(config, address + HeaderSize); - } + transformFeedbackOutputs = new TransformFeedbackOutput[0xc0]; - foreach (DecodedFunction function in program) - { - foreach (Block block in function.Blocks) + for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++) { - if (maxEndAddress < block.EndAddress) + var locations = gpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex); + var stride = gpuAccessor.QueryTransformFeedbackStride(tfbIndex); + + for (int i = 0; i < locations.Length; i++) { - maxEndAddress = block.EndAddress; + byte wordOffset = locations[i]; + if (wordOffset < 0xc0) + { + transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride); + transformFeedbackVecMap |= 1UL << (wordOffset / 4); + } } } } - config.SizeAdd((int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize)); + return new ShaderDefinitions( + header.Stage, + gpuAccessor.QueryGraphicsState(), + header.Stage == ShaderStage.Geometry && header.GpPassthrough, + header.ThreadsPerInputPrimitive, + header.OutputTopology, + header.MaxOutputVertexCount, + header.ImapTypes, + header.OmapTargets, + header.OmapSampleMask, + header.OmapDepth, + transformFeedbackEnabled, + transformFeedbackVecMap, + transformFeedbackOutputs); + } - return new TranslatorContext(address, program, config); + private static int GetLocalMemorySize(ShaderHeader header) + { + return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp); } - internal static FunctionCode[] EmitShader(DecodedProgram program, ShaderConfig config, bool initializeOutputs, out int initializationOperations) + internal static FunctionCode[] EmitShader( + TranslatorContext translatorContext, + ResourceManager resourceManager, + DecodedProgram program, + bool initializeOutputs, + out int initializationOperations) { initializationOperations = 0; @@ -149,11 +146,11 @@ namespace Ryujinx.Graphics.Shader.Translation for (int index = 0; index < functions.Length; index++) { - EmitterContext context = new(program, config, index != 0); + EmitterContext context = new(translatorContext, resourceManager, program, index != 0); if (initializeOutputs && index == 0) { - EmitOutputsInitialization(context, config); + EmitOutputsInitialization(context, translatorContext.AttributeUsage, translatorContext.GpuAccessor, translatorContext.Stage); initializationOperations = context.OperationsCount; } @@ -168,27 +165,27 @@ namespace Ryujinx.Graphics.Shader.Translation EmitOps(context, block); } - functions[index] = new FunctionCode(context.GetOperations()); + functions[index] = new(context.GetOperations()); } return functions; } - private static void EmitOutputsInitialization(EmitterContext context, ShaderConfig config) + private static void EmitOutputsInitialization(EmitterContext context, AttributeUsage attributeUsage, IGpuAccessor gpuAccessor, ShaderStage stage) { // Compute has no output attributes, and fragment is the last stage, so we // don't need to initialize outputs on those stages. - if (config.Stage == ShaderStage.Compute || config.Stage == ShaderStage.Fragment) + if (stage == ShaderStage.Compute || stage == ShaderStage.Fragment) { return; } - if (config.Stage == ShaderStage.Vertex) + if (stage == ShaderStage.Vertex) { InitializePositionOutput(context); } - UInt128 usedAttributes = context.Config.NextInputAttributesComponents; + UInt128 usedAttributes = context.TranslatorContext.AttributeUsage.NextInputAttributesComponents; while (usedAttributes != UInt128.Zero) { int index = (int)UInt128.TrailingZeroCount(usedAttributes); @@ -197,7 +194,7 @@ namespace Ryujinx.Graphics.Shader.Translation usedAttributes &= ~(UInt128.One << index); // We don't need to initialize passthrough attributes. - if ((context.Config.PassthroughAttributes & (1 << vecIndex)) != 0) + if ((context.TranslatorContext.AttributeUsage.PassthroughAttributes & (1 << vecIndex)) != 0) { continue; } @@ -205,30 +202,28 @@ namespace Ryujinx.Graphics.Shader.Translation InitializeOutputComponent(context, vecIndex, index & 3, perPatch: false); } - if (context.Config.NextUsedInputAttributesPerPatch != null) + if (context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch != null) { - foreach (int vecIndex in context.Config.NextUsedInputAttributesPerPatch.Order()) + foreach (int vecIndex in context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch.Order()) { InitializeOutput(context, vecIndex, perPatch: true); } } - if (config.NextUsesFixedFuncAttributes) + if (attributeUsage.NextUsesFixedFuncAttributes) { - bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation(); + bool supportsLayerFromVertexOrTess = gpuAccessor.QueryHostSupportsLayerVertexTessellation(); int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1; for (int i = fixedStartAttr; i < fixedStartAttr + 5 + AttributeConsts.TexCoordCount; i++) { - int index = config.GetFreeUserAttribute(isOutput: true, i); + int index = attributeUsage.GetFreeUserAttribute(isOutput: true, i); if (index < 0) { break; } InitializeOutput(context, index, perPatch: false); - - config.SetOutputUserAttributeFixedFunc(index); } } } @@ -253,11 +248,11 @@ namespace Ryujinx.Graphics.Shader.Translation { StorageKind storageKind = perPatch ? StorageKind.OutputPerPatch : StorageKind.Output; - if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing)) + if (context.TranslatorContext.Definitions.OaIndexing) { Operand invocationId = null; - if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch) + if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch) { invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId); } @@ -268,7 +263,7 @@ namespace Ryujinx.Graphics.Shader.Translation } else { - if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch) + if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch) { Operand invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId); context.Store(storageKind, IoVariable.UserDefined, Const(location), invocationId, Const(c), ConstF(c == 3 ? 1f : 0f)); @@ -286,7 +281,7 @@ namespace Ryujinx.Graphics.Shader.Translation { InstOp op = block.OpCodes[opIndex]; - if (context.Config.Options.Flags.HasFlag(TranslationFlags.DebugMode)) + if (context.TranslatorContext.Options.Flags.HasFlag(TranslationFlags.DebugMode)) { string instName; @@ -298,7 +293,7 @@ namespace Ryujinx.Graphics.Shader.Translation { instName = "???"; - context.Config.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16})."); + context.TranslatorContext.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16})."); } string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}"; diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index 40a79c54..39ce92c9 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -1,8 +1,11 @@ -using Ryujinx.Graphics.Shader.CodeGen.Glsl; +using Ryujinx.Graphics.Shader.CodeGen; +using Ryujinx.Graphics.Shader.CodeGen.Glsl; using Ryujinx.Graphics.Shader.CodeGen.Spirv; using Ryujinx.Graphics.Shader.Decoders; using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation.Optimizations; +using Ryujinx.Graphics.Shader.Translation.Transforms; using System; using System.Collections.Generic; using System.Linq; @@ -15,22 +18,47 @@ namespace Ryujinx.Graphics.Shader.Translation public class TranslatorContext { private readonly DecodedProgram _program; - private readonly ShaderConfig _config; + private readonly int _localMemorySize; public ulong Address { get; } + public int Size { get; } + public int Cb1DataSize => _program.Cb1DataSize; - public ShaderStage Stage => _config.Stage; - public int Size => _config.Size; - public int Cb1DataSize => _config.Cb1DataSize; - public bool LayerOutputWritten => _config.LayerOutputWritten; + internal bool HasLayerInputAttribute { get; private set; } + internal int GpLayerInputAttribute { get; private set; } - public IGpuAccessor GpuAccessor => _config.GpuAccessor; + internal AttributeUsage AttributeUsage => _program.AttributeUsage; - internal TranslatorContext(ulong address, DecodedProgram program, ShaderConfig config) + internal ShaderDefinitions Definitions { get; } + + public ShaderStage Stage => Definitions.Stage; + + internal IGpuAccessor GpuAccessor { get; } + + internal TranslationOptions Options { get; } + + internal FeatureFlags UsedFeatures { get; private set; } + + public bool LayerOutputWritten { get; private set; } + public int LayerOutputAttribute { get; private set; } + + internal TranslatorContext( + ulong address, + int size, + int localMemorySize, + ShaderDefinitions definitions, + IGpuAccessor gpuAccessor, + TranslationOptions options, + DecodedProgram program) { Address = address; + Size = size; _program = program; - _config = config; + _localMemorySize = localMemorySize; + Definitions = definitions; + GpuAccessor = gpuAccessor; + Options = options; + UsedFeatures = program.UsedFeatures; } private static bool IsLoadUserDefined(Operation operation) @@ -131,63 +159,259 @@ namespace Ryujinx.Graphics.Shader.Translation return output; } - public void SetNextStage(TranslatorContext nextStage) + internal int GetDepthRegister() + { + // The depth register is always two registers after the last color output. + return BitOperations.PopCount((uint)Definitions.OmapTargets) + 1; + } + + public void SetLayerOutputAttribute(int attr) { - _config.MergeFromtNextStage(nextStage._config); + LayerOutputWritten = true; + LayerOutputAttribute = attr; } public void SetGeometryShaderLayerInputAttribute(int attr) { - _config.SetGeometryShaderLayerInputAttribute(attr); + UsedFeatures |= FeatureFlags.RtLayer; + HasLayerInputAttribute = true; + GpLayerInputAttribute = attr; } public void SetLastInVertexPipeline() { - _config.SetLastInVertexPipeline(); + Definitions.LastInVertexPipeline = true; } - public ShaderProgram Translate(TranslatorContext other = null) + public void SetNextStage(TranslatorContext nextStage) { - bool usesLocalMemory = _config.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); + AttributeUsage.MergeFromtNextStage( + Definitions.GpPassthrough, + nextStage.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr), + nextStage.AttributeUsage); + + // We don't consider geometry shaders using the geometry shader passthrough feature + // as being the last because when this feature is used, it can't actually modify any of the outputs, + // so the stage that comes before it is the last one that can do modifications. + if (nextStage.Definitions.Stage != ShaderStage.Fragment && + (nextStage.Definitions.Stage != ShaderStage.Geometry || !nextStage.Definitions.GpPassthrough)) + { + Definitions.LastInVertexPipeline = false; + } + } - _config.ResourceManager.SetCurrentLocalMemory(_config.LocalMemorySize, usesLocalMemory); + public ShaderProgram Translate() + { + ResourceManager resourceManager = CreateResourceManager(); - if (_config.Stage == ShaderStage.Compute) + bool usesLocalMemory = _program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); + + resourceManager.SetCurrentLocalMemory(_localMemorySize, usesLocalMemory); + + if (Stage == ShaderStage.Compute) { - bool usesSharedMemory = _config.UsedFeatures.HasFlag(FeatureFlags.SharedMemory); + bool usesSharedMemory = _program.UsedFeatures.HasFlag(FeatureFlags.SharedMemory); - _config.ResourceManager.SetCurrentSharedMemory(GpuAccessor.QueryComputeSharedMemorySize(), usesSharedMemory); + resourceManager.SetCurrentSharedMemory(GpuAccessor.QueryComputeSharedMemorySize(), usesSharedMemory); } - FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _); + FunctionCode[] code = EmitShader(this, resourceManager, _program, initializeOutputs: true, out _); + + return Translate(code, resourceManager, UsedFeatures, _program.ClipDistancesWritten); + } - if (other != null) + public ShaderProgram Translate(TranslatorContext other) + { + ResourceManager resourceManager = CreateResourceManager(); + + bool usesLocalMemory = _program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); + resourceManager.SetCurrentLocalMemory(_localMemorySize, usesLocalMemory); + + FunctionCode[] code = EmitShader(this, resourceManager, _program, initializeOutputs: false, out _); + + bool otherUsesLocalMemory = other._program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); + resourceManager.SetCurrentLocalMemory(other._localMemorySize, otherUsesLocalMemory); + + FunctionCode[] otherCode = EmitShader(other, resourceManager, other._program, initializeOutputs: true, out int aStart); + + code = Combine(otherCode, code, aStart); + + return Translate( + code, + resourceManager, + UsedFeatures | other.UsedFeatures, + (byte)(_program.ClipDistancesWritten | other._program.ClipDistancesWritten)); + } + + private ShaderProgram Translate(FunctionCode[] functions, ResourceManager resourceManager, FeatureFlags usedFeatures, byte clipDistancesWritten) + { + var cfgs = new ControlFlowGraph[functions.Length]; + var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length]; + + for (int i = 0; i < functions.Length; i++) { - other._config.MergeOutputUserAttributes(_config.UsedOutputAttributes, Enumerable.Empty<int>()); + cfgs[i] = ControlFlowGraph.Create(functions[i].Code); - // We need to share the resource manager since both shaders accesses the same constant buffers. - other._config.ResourceManager = _config.ResourceManager; - other._config.ResourceManager.SetCurrentLocalMemory(other._config.LocalMemorySize, other._config.UsedFeatures.HasFlag(FeatureFlags.LocalMemory)); + if (i != 0) + { + frus[i] = RegisterUsage.RunPass(cfgs[i]); + } + } - FunctionCode[] otherCode = EmitShader(other._program, other._config, initializeOutputs: true, out int aStart); + List<Function> funcs = new(functions.Length); - code = Combine(otherCode, code, aStart); + for (int i = 0; i < functions.Length; i++) + { + funcs.Add(null); + } - _config.InheritFrom(other._config); + HelperFunctionManager hfm = new(funcs, Definitions.Stage); + + for (int i = 0; i < functions.Length; i++) + { + var cfg = cfgs[i]; + + int inArgumentsCount = 0; + int outArgumentsCount = 0; + + if (i != 0) + { + var fru = frus[i]; + + inArgumentsCount = fru.InArguments.Length; + outArgumentsCount = fru.OutArguments.Length; + } + + if (cfg.Blocks.Length != 0) + { + RegisterUsage.FixupCalls(cfg.Blocks, frus); + + Dominance.FindDominators(cfg); + Dominance.FindDominanceFrontiers(cfg.Blocks); + + Ssa.Rename(cfg.Blocks); + + TransformContext context = new( + hfm, + cfg.Blocks, + resourceManager, + GpuAccessor, + Options.TargetLanguage, + Definitions.Stage, + ref usedFeatures); + + Optimizer.RunPass(context); + TransformPasses.RunPass(context); + } + + funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount); + } + + var identification = ShaderIdentifier.Identify(funcs, GpuAccessor, Definitions.Stage, Definitions.InputTopology, out int layerInputAttr); + + return Generate( + funcs, + AttributeUsage, + Definitions, + resourceManager, + usedFeatures, + clipDistancesWritten, + identification, + layerInputAttr); + } + + private ShaderProgram Generate( + IReadOnlyList<Function> funcs, + AttributeUsage attributeUsage, + ShaderDefinitions definitions, + ResourceManager resourceManager, + FeatureFlags usedFeatures, + byte clipDistancesWritten, + ShaderIdentification identification = ShaderIdentification.None, + int layerInputAttr = 0) + { + var sInfo = StructuredProgram.MakeStructuredProgram( + funcs, + attributeUsage, + definitions, + resourceManager, + Options.Flags.HasFlag(TranslationFlags.DebugMode)); + + var info = new ShaderProgramInfo( + resourceManager.GetConstantBufferDescriptors(), + resourceManager.GetStorageBufferDescriptors(), + resourceManager.GetTextureDescriptors(), + resourceManager.GetImageDescriptors(), + identification, + layerInputAttr, + definitions.Stage, + usedFeatures.HasFlag(FeatureFlags.FragCoordXY), + usedFeatures.HasFlag(FeatureFlags.InstanceId), + usedFeatures.HasFlag(FeatureFlags.DrawParameters), + usedFeatures.HasFlag(FeatureFlags.RtLayer), + clipDistancesWritten, + definitions.OmapTargets); + + var hostCapabilities = new HostCapabilities( + GpuAccessor.QueryHostReducedPrecision(), + GpuAccessor.QueryHostSupportsFragmentShaderInterlock(), + GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(), + GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(), + GpuAccessor.QueryHostSupportsShaderBallot(), + GpuAccessor.QueryHostSupportsShaderBarrierDivergence(), + GpuAccessor.QueryHostSupportsTextureShadowLod(), + GpuAccessor.QueryHostSupportsViewportMask()); + + var parameters = new CodeGenParameters(attributeUsage, definitions, resourceManager.Properties, hostCapabilities, GpuAccessor, Options.TargetApi); + + return Options.TargetLanguage switch + { + TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, parameters)), + TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, parameters)), + _ => throw new NotImplementedException(Options.TargetLanguage.ToString()), + }; + } + + private ResourceManager CreateResourceManager() + { + ResourceManager resourceManager = new(Definitions.Stage, GpuAccessor); + + if (!GpuAccessor.QueryHostSupportsTransformFeedback() && GpuAccessor.QueryTransformFeedbackEnabled()) + { + StructureType tfeInfoStruct = new(new StructureField[] + { + new StructureField(AggregateType.Array | AggregateType.U32, "base_offset", 4), + new StructureField(AggregateType.U32, "vertex_count") + }); + + BufferDefinition tfeInfoBuffer = new(BufferLayout.Std430, 1, Constants.TfeInfoBinding, "tfe_info", tfeInfoStruct); + resourceManager.Properties.AddOrUpdateStorageBuffer(tfeInfoBuffer); + + StructureType tfeDataStruct = new(new StructureField[] + { + new StructureField(AggregateType.Array | AggregateType.U32, "data", 0) + }); + + for (int i = 0; i < Constants.TfeBuffersCount; i++) + { + int binding = Constants.TfeBufferBaseBinding + i; + BufferDefinition tfeDataBuffer = new(BufferLayout.Std430, 1, binding, $"tfe_data{i}", tfeDataStruct); + resourceManager.Properties.AddOrUpdateStorageBuffer(tfeDataBuffer); + } } - return Translator.Translate(code, _config); + return resourceManager; } public ShaderProgram GenerateGeometryPassthrough() { - int outputAttributesMask = _config.UsedOutputAttributes; - int layerOutputAttr = _config.LayerOutputAttribute; + int outputAttributesMask = AttributeUsage.UsedOutputAttributes; + int layerOutputAttr = LayerOutputAttribute; OutputTopology outputTopology; int maxOutputVertices; - switch (GpuAccessor.QueryPrimitiveTopology()) + switch (Definitions.InputTopology) { case InputTopology.Points: outputTopology = OutputTopology.PointList; @@ -204,9 +428,10 @@ namespace Ryujinx.Graphics.Shader.Translation break; } - ShaderConfig config = new(ShaderStage.Geometry, outputTopology, maxOutputVertices, GpuAccessor, _config.Options); + var attributeUsage = new AttributeUsage(GpuAccessor); + var resourceManager = new ResourceManager(ShaderStage.Geometry, GpuAccessor); - EmitterContext context = new(default, config, false); + var context = new EmitterContext(); for (int v = 0; v < maxOutputVertices; v++) { @@ -231,10 +456,7 @@ namespace Ryujinx.Graphics.Shader.Translation else { context.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(attrIndex), Const(c), value); - config.SetOutputUserAttribute(attrIndex); } - - config.SetInputUserAttribute(attrIndex, c); } } @@ -254,16 +476,15 @@ namespace Ryujinx.Graphics.Shader.Translation var cfg = ControlFlowGraph.Create(operations); var function = new Function(cfg.Blocks, "main", false, 0, 0); - var sInfo = StructuredProgram.MakeStructuredProgram(new[] { function }, config); - - var info = config.CreateProgramInfo(); + var definitions = new ShaderDefinitions( + ShaderStage.Geometry, + GpuAccessor.QueryGraphicsState(), + false, + 1, + outputTopology, + maxOutputVertices); - return config.Options.TargetLanguage switch - { - TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)), - TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)), - _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()), - }; + return Generate(new[] { function }, attributeUsage, definitions, resourceManager, FeatureFlags.RtLayer, 0); } } } |