diff options
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs')
-rw-r--r-- | src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs | 1149 |
1 files changed, 903 insertions, 246 deletions
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs index 7758b4c6..14904b26 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs @@ -1,483 +1,1140 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; using System.Collections.Generic; +using System.Linq; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; -using static Ryujinx.Graphics.Shader.Translation.GlobalMemory; namespace Ryujinx.Graphics.Shader.Translation.Optimizations { static class GlobalToStorage { - private struct SearchResult + private const int DriverReservedCb = 0; + + enum LsMemoryType { - public static SearchResult NotFound => new SearchResult(-1, 0); - public bool Found => SbCbSlot != -1; - public int SbCbSlot { get; } - public int SbCbOffset { get; } + Local, + Shared + } - public SearchResult(int sbCbSlot, int sbCbOffset) + private class GtsContext + { + private struct Entry { - SbCbSlot = sbCbSlot; - SbCbOffset = sbCbOffset; + public readonly int FunctionId; + public readonly Instruction Inst; + public readonly StorageKind StorageKind; + public readonly bool IsMultiTarget; + public readonly IReadOnlyList<uint> TargetCbs; + + public Entry( + int functionId, + Instruction inst, + StorageKind storageKind, + bool isMultiTarget, + IReadOnlyList<uint> targetCbs) + { + FunctionId = functionId; + Inst = inst; + StorageKind = storageKind; + IsMultiTarget = isMultiTarget; + TargetCbs = targetCbs; + } } - } - public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask) - { - int sbStart = GetStorageBaseCbOffset(config.Stage); - int sbEnd = sbStart + StorageDescsSize; + private struct LsKey : IEquatable<LsKey> + { + public readonly Operand BaseOffset; + public readonly int ConstOffset; + public readonly LsMemoryType Type; - int ubeStart = UbeBaseOffset; - int ubeEnd = UbeBaseOffset + UbeDescsSize; + public LsKey(Operand baseOffset, int constOffset, LsMemoryType type) + { + BaseOffset = baseOffset; + ConstOffset = constOffset; + Type = type; + } - for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) - { - for (int index = 0; index < node.Value.SourcesCount; index++) + public override int GetHashCode() + { + return HashCode.Combine(BaseOffset, ConstOffset, Type); + } + + public override bool Equals(object obj) { - Operand src = node.Value.GetSource(index); + return obj is LsKey other && Equals(other); + } + + public bool Equals(LsKey other) + { + return other.BaseOffset == BaseOffset && other.ConstOffset == ConstOffset && other.Type == Type; + } + } + + private readonly List<Entry> _entries; + private readonly Dictionary<LsKey, Dictionary<uint, SearchResult>> _sharedEntries; + private readonly HelperFunctionManager _hfm; + + public GtsContext(HelperFunctionManager hfm) + { + _entries = new List<Entry>(); + _sharedEntries = new Dictionary<LsKey, Dictionary<uint, SearchResult>>(); + _hfm = hfm; + } + + public int AddFunction(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs, Function function) + { + int functionId = _hfm.AddFunction(function); - int storageIndex = GetStorageIndex(src, sbStart, sbEnd); + _entries.Add(new Entry(functionId, baseOp.Inst, baseOp.StorageKind, isMultiTarget, targetCbs)); - if (storageIndex >= 0) + return functionId; + } + + public bool TryGetFunctionId(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs, out int functionId) + { + foreach (Entry entry in _entries) + { + if (entry.Inst != baseOp.Inst || + entry.StorageKind != baseOp.StorageKind || + entry.IsMultiTarget != isMultiTarget || + entry.TargetCbs.Count != targetCbs.Count) { - sbUseMask |= 1 << storageIndex; + continue; } - if (config.Stage == ShaderStage.Compute) - { - int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd); + bool allEqual = true; - if (constantIndex >= 0) + for (int index = 0; index < targetCbs.Count; index++) + { + if (targetCbs[index] != entry.TargetCbs[index]) { - ubeUseMask |= 1 << constantIndex; + allEqual = false; + break; } } + + if (allEqual) + { + functionId = entry.FunctionId; + return true; + } + } + + functionId = -1; + return false; + } + + public void AddMemoryTargetCb(LsMemoryType type, Operand baseOffset, int constOffset, uint targetCb, SearchResult result) + { + LsKey key = new LsKey(baseOffset, constOffset, type); + + if (!_sharedEntries.TryGetValue(key, out Dictionary<uint, SearchResult> targetCbs)) + { + // No entry with this base offset, create a new one. + + targetCbs = new Dictionary<uint, SearchResult>() { { targetCb, result } }; + + _sharedEntries.Add(key, targetCbs); } + else if (targetCbs.TryGetValue(targetCb, out SearchResult existingResult)) + { + // If our entry already exists, but does not match the new result, + // we set the offset to null to indicate there are multiple possible offsets. + // This will be used on the multi-target access that does not need to know the offset. - if (!(node.Value is Operation operation)) + if (existingResult.Offset != null && + (existingResult.Offset != result.Offset || + existingResult.ConstOffset != result.ConstOffset)) + { + targetCbs[targetCb] = new SearchResult(result.SbCbSlot, result.SbCbOffset); + } + } + else { - continue; + // An entry for this base offset already exists, but not for the specified + // constant buffer region where the storage buffer base address and size + // comes from. + + targetCbs.Add(targetCb, result); } + } + + public bool TryGetMemoryTargetCb(LsMemoryType type, Operand baseOffset, int constOffset, out SearchResult result) + { + LsKey key = new LsKey(baseOffset, constOffset, type); - if (UsesGlobalMemory(operation.Inst, operation.StorageKind)) + if (_sharedEntries.TryGetValue(key, out Dictionary<uint, SearchResult> targetCbs) && targetCbs.Count == 1) { - Operand source = operation.GetSource(0); + SearchResult candidateResult = targetCbs.Values.First(); - var result = SearchForStorageBase(config, block, source); - if (!result.Found) + if (candidateResult.Found) + { + result = candidateResult; + + return true; + } + } + + result = default; + + return false; + } + } + + private struct SearchResult + { + public static SearchResult NotFound => new SearchResult(-1, 0); + public bool Found => SbCbSlot != -1; + public int SbCbSlot { get; } + public int SbCbOffset { get; } + public Operand Offset { get; } + public int ConstOffset { get; } + + public SearchResult(int sbCbSlot, int sbCbOffset) + { + SbCbSlot = sbCbSlot; + SbCbOffset = sbCbOffset; + } + + public SearchResult(int sbCbSlot, int sbCbOffset, Operand offset, int constOffset = 0) + { + SbCbSlot = sbCbSlot; + SbCbOffset = sbCbOffset; + Offset = offset; + ConstOffset = constOffset; + } + } + + public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config) + { + GtsContext gtsContext = new GtsContext(hfm); + + foreach (BasicBlock block in blocks) + { + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + if (!(node.Value is Operation operation)) { continue; } - if (config.Stage == ShaderStage.Compute && - operation.Inst == Instruction.LoadGlobal && - result.SbCbSlot == DriverReservedCb && - result.SbCbOffset >= UbeBaseOffset && - result.SbCbOffset < UbeBaseOffset + UbeDescsSize) + if (IsGlobalMemory(operation.StorageKind)) { - // Here we effectively try to replace a LDG instruction with LDC. - // The hardware only supports a limited amount of constant buffers - // so NVN "emulates" more constant buffers using global memory access. - // Here we try to replace the global access back to a constant buffer - // load. - node = ReplaceLdgWithLdc(node, config, (result.SbCbOffset - UbeBaseOffset) / StorageDescSize); + LinkedListNode<INode> nextNode = ReplaceGlobalMemoryWithStorage(gtsContext, config, block, node); + + if (nextNode == null) + { + // The returned value being null means that the global memory replacement failed, + // so we just make loads read 0 and stores do nothing. + + config.GpuAccessor.Log($"Failed to reserve storage buffer for global memory operation \"{operation.Inst}\"."); + + if (operation.Dest != null) + { + operation.TurnIntoCopy(Const(0)); + } + else + { + Utils.DeleteNode(node, operation); + } + } + else + { + node = nextNode; + } } - else + else if (operation.Inst == Instruction.StoreShared || operation.Inst == Instruction.StoreLocal) { - // Storage buffers are implemented using global memory access. - // If we know from where the base address of the access is loaded, - // we can guess which storage buffer it is accessing. - // We can then replace the global memory access with a storage - // buffer access. - node = ReplaceGlobalWithStorage(block, node, config, config.GetSbSlot((byte)result.SbCbSlot, (ushort)result.SbCbOffset)); + // The NVIDIA compiler can sometimes use shared or local memory as temporary + // storage to place the base address and size on, so we need + // to be able to find such information stored in memory too. + + if (TryGetMemoryOffsets(operation, out LsMemoryType type, out Operand baseOffset, out int constOffset)) + { + Operand value = operation.GetSource(operation.SourcesCount - 1); + + var result = FindUniqueBaseAddressCb(gtsContext, block, value, needsOffset: false); + if (result.Found) + { + uint targetCb = PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset); + gtsContext.AddMemoryTargetCb(type, baseOffset, constOffset, targetCb, result); + } + } } } } + } - config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask); + private static bool IsGlobalMemory(StorageKind storageKind) + { + return storageKind == StorageKind.GlobalMemory || + storageKind == StorageKind.GlobalMemoryS8 || + storageKind == StorageKind.GlobalMemoryS16 || + storageKind == StorageKind.GlobalMemoryU8 || + storageKind == StorageKind.GlobalMemoryU16; } - private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex) + private static bool IsSmallInt(StorageKind storageKind) { - Operation operation = (Operation)node.Value; + return storageKind == StorageKind.GlobalMemoryS8 || + storageKind == StorageKind.GlobalMemoryS16 || + storageKind == StorageKind.GlobalMemoryU8 || + storageKind == StorageKind.GlobalMemoryU16; + } - bool isAtomic = operation.Inst.IsAtomic(); - bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8; - bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8; + private static LinkedListNode<INode> ReplaceGlobalMemoryWithStorage( + GtsContext gtsContext, + ShaderConfig config, + BasicBlock block, + LinkedListNode<INode> node) + { + Operation operation = node.Value as Operation; + Operand globalAddress = operation.GetSource(0); + SearchResult result = FindUniqueBaseAddressCb(gtsContext, block, globalAddress, needsOffset: true); - config.SetUsedStorageBuffer(storageIndex, isWrite); + if (result.Found) + { + // We found the storage buffer that is being accessed. + // There are two possible paths here, if the operation is simple enough, + // we just generate the storage access code inline. + // Otherwise, we generate a function call (and the function if necessary). - Operand[] sources = new Operand[operation.SourcesCount]; + Operand offset = result.Offset; - sources[0] = Const(storageIndex); - sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8); + bool storageUnaligned = config.GpuAccessor.QueryHasUnalignedStorageBuffer(); - for (int index = 2; index < operation.SourcesCount; index++) - { - sources[index] = operation.GetSource(index); + if (storageUnaligned) + { + Operand baseAddress = Cbuf(result.SbCbSlot, result.SbCbOffset); + + Operand baseAddressMasked = Local(); + Operand hostOffset = Local(); + + int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment(); + + Operation maskOp = new Operation(Instruction.BitwiseAnd, baseAddressMasked, new[] { baseAddress, Const(-alignment) }); + Operation subOp = new Operation(Instruction.Subtract, hostOffset, new[] { globalAddress, baseAddressMasked }); + + node.List.AddBefore(node, maskOp); + node.List.AddBefore(node, subOp); + + offset = hostOffset; + } + else if (result.ConstOffset != 0) + { + Operand newOffset = Local(); + + Operation addOp = new Operation(Instruction.Add, newOffset, new[] { offset, Const(result.ConstOffset) }); + + node.List.AddBefore(node, addOp); + + offset = newOffset; + } + + if (CanUseInlineStorageOp(operation, config.Options.TargetLanguage)) + { + return GenerateInlineStorageOp(config, node, operation, offset, result); + } + else + { + if (!TryGenerateSingleTargetStorageOp(gtsContext, config, operation, result, out int functionId)) + { + return null; + } + + return GenerateCallStorageOp(node, operation, offset, functionId); + } } + else + { + // Failed to find the storage buffer directly. + // Try to walk through Phi chains and find all possible constant buffers where + // the base address might be stored. + // Generate a helper function that will check all possible storage buffers and use the right one. - Operation storageOp; + if (!TryGenerateMultiTargetStorageOp(gtsContext, config, block, operation, out int functionId)) + { + return null; + } + + return GenerateCallStorageOp(node, operation, null, functionId); + } + } - if (isAtomic) + private static bool CanUseInlineStorageOp(Operation operation, TargetLanguage targetLanguage) + { + if (operation.StorageKind != StorageKind.GlobalMemory) { - storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); + return false; } - else if (operation.Inst == Instruction.LoadGlobal) + + return (operation.Inst != Instruction.AtomicMaxS32 && + operation.Inst != Instruction.AtomicMinS32) || targetLanguage == TargetLanguage.Spirv; + } + + private static LinkedListNode<INode> GenerateInlineStorageOp( + ShaderConfig config, + LinkedListNode<INode> node, + Operation operation, + Operand offset, + SearchResult result) + { + bool isStore = operation.Inst == Instruction.Store || operation.Inst.IsAtomic(); + if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding)) { - storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources); + return null; } - else + + Operand wordOffset = Local(); + + Operand[] sources; + + if (operation.Inst == Instruction.AtomicCompareAndSwap) { - Instruction storeInst = operation.Inst switch + sources = new Operand[] { - Instruction.StoreGlobal16 => Instruction.StoreStorage16, - Instruction.StoreGlobal8 => Instruction.StoreStorage8, - _ => Instruction.StoreStorage + Const(binding), + Const(0), + wordOffset, + operation.GetSource(operation.SourcesCount - 2), + operation.GetSource(operation.SourcesCount - 1) }; - - storageOp = new Operation(storeInst, null, sources); } - - for (int index = 0; index < operation.SourcesCount; index++) + else if (isStore) + { + sources = new Operand[] { Const(binding), Const(0), wordOffset, operation.GetSource(operation.SourcesCount - 1) }; + } + else { - operation.SetSource(index, null); + sources = new Operand[] { Const(binding), Const(0), wordOffset }; } - LinkedListNode<INode> oldNode = node; + Operation shiftOp = new Operation(Instruction.ShiftRightU32, wordOffset, new[] { offset, Const(2) }); + Operation storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); - node = node.List.AddBefore(node, storageOp); + node.List.AddBefore(node, shiftOp); + LinkedListNode<INode> newNode = node.List.AddBefore(node, storageOp); - node.List.Remove(oldNode); + Utils.DeleteNode(node, operation); - return node; + return newNode; } - private static Operand GetStorageOffset( - BasicBlock block, - LinkedListNode<INode> node, - ShaderConfig config, - int storageIndex, - Operand addrLow, - bool isStg16Or8) + private static LinkedListNode<INode> GenerateCallStorageOp(LinkedListNode<INode> node, Operation operation, Operand offset, int functionId) { - (int sbCbSlot, int sbCbOffset) = config.GetSbCbInfo(storageIndex); + // Generate call to a helper function that will perform the storage buffer operation. - bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment); + Operand[] sources = new Operand[operation.SourcesCount - 1 + (offset == null ? 2 : 1)]; - (Operand byteOffset, int constantOffset) = storageAligned ? - GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), sbCbSlot, sbCbOffset) : - (null, 0); + sources[0] = Const(functionId); - if (byteOffset != null) + if (offset != null) { - ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset); + // If the offset was supplised, we use that and skip the global address. + + sources[1] = offset; + + for (int srcIndex = 2; srcIndex < operation.SourcesCount; srcIndex++) + { + sources[srcIndex] = operation.GetSource(srcIndex); + } } + else + { + // Use the 64-bit global address which is split in 2 32-bit arguments. - if (byteOffset == null) + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + sources[srcIndex + 1] = operation.GetSource(srcIndex); + } + } + + bool returnsValue = operation.Dest != null; + Operand returnValue = returnsValue ? Local() : null; + + Operation callOp = new Operation(Instruction.Call, returnValue, sources); + + LinkedListNode<INode> newNode = node.List.AddBefore(node, callOp); + + if (returnsValue) { - Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset); - Operand baseAddrTrunc = Local(); + operation.TurnIntoCopy(returnValue); - Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + return node; + } + else + { + Utils.DeleteNode(node, operation); - Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask); + return newNode; + } + } - node.List.AddBefore(node, andOp); + private static bool TryGenerateSingleTargetStorageOp( + GtsContext gtsContext, + ShaderConfig config, + Operation operation, + SearchResult result, + out int functionId) + { + List<uint> targetCbs = new List<uint>() { PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset) }; - Operand offset = Local(); - Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc); + if (gtsContext.TryGetFunctionId(operation, isMultiTarget: false, targetCbs, out functionId)) + { + return true; + } - node.List.AddBefore(node, subOp); + int inArgumentsCount = 1; - byteOffset = offset; + if (operation.Inst == Instruction.AtomicCompareAndSwap) + { + inArgumentsCount = 3; } - else if (constantOffset != 0) + else if (operation.Inst == Instruction.Store || operation.Inst.IsAtomic()) { - Operand offset = Local(); - Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset)); + inArgumentsCount = 2; + } + + EmitterContext context = new EmitterContext(); + + Operand offset = Argument(0); + Operand compare = null; + Operand value = null; - node.List.AddBefore(node, addOp); + if (inArgumentsCount == 3) + { + compare = Argument(1); + value = Argument(2); + } + else if (inArgumentsCount == 2) + { + value = Argument(1); + } - byteOffset = offset; + if (!TryGenerateStorageOp( + config, + context, + operation.Inst, + operation.StorageKind, + offset, + compare, + value, + result, + out Operand resultValue)) + { + functionId = 0; + return false; } - if (isStg16Or8) + bool returnsValue = resultValue != null; + + if (returnsValue) { - return byteOffset; + context.Return(resultValue); + } + else + { + context.Return(); } - Operand wordOffset = Local(); - Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); + string functionName = GetFunctionName(operation, isMultiTarget: false, targetCbs); - node.List.AddBefore(node, shrOp); + Function function = new Function( + ControlFlowGraph.Create(context.GetOperations()).Blocks, + functionName, + returnsValue, + inArgumentsCount, + 0); - return wordOffset; - } + functionId = gtsContext.AddFunction(operation, isMultiTarget: false, targetCbs, function); - private static bool IsCbOffset(Operand operand, int slot, int offset) - { - return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == slot && operand.GetCbufOffset() == offset; + return true; } - private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset) + private static bool TryGenerateMultiTargetStorageOp( + GtsContext gtsContext, + ShaderConfig config, + BasicBlock block, + Operation operation, + out int functionId) { - // When we emit 16/8-bit LDG, we add extra code to determine the address alignment. - // Eliminate the storage buffer base address from this too, leaving only the byte offset. + Queue<PhiNode> phis = new Queue<PhiNode>(); + HashSet<PhiNode> visited = new HashSet<PhiNode>(); + List<uint> targetCbs = new List<uint>(); + + Operand globalAddress = operation.GetSource(0); - foreach (INode useNode in address.UseOps) + if (globalAddress.AsgOp is Operation addOp && addOp.Inst == Instruction.Add) { - if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd) + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Type == OperandType.Constant && src2.Type == OperandType.LocalVariable) { - Operand src1 = op.GetSource(0); - Operand src2 = op.GetSource(1); + globalAddress = src2; + } + else if (src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) + { + globalAddress = src1; + } + } + + if (globalAddress.AsgOp is PhiNode phi && visited.Add(phi)) + { + phis.Enqueue(phi); + } + else + { + SearchResult result = FindUniqueBaseAddressCb(gtsContext, block, operation.GetSource(0), needsOffset: false); - int addressIndex = -1; + if (result.Found) + { + targetCbs.Add(PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset)); + } + } - if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3) - { - addressIndex = 0; - } - else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3) - { - addressIndex = 1; - } + while (phis.TryDequeue(out phi)) + { + for (int srcIndex = 0; srcIndex < phi.SourcesCount; srcIndex++) + { + BasicBlock phiBlock = phi.GetBlock(srcIndex); + Operand phiSource = phi.GetSource(srcIndex); - if (addressIndex != -1) + SearchResult result = FindUniqueBaseAddressCb(gtsContext, phiBlock, phiSource, needsOffset: false); + + if (result.Found) { - LinkedListNode<INode> node = list.Find(op); + uint targetCb = PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset); - // Add offset calculation before the use. Needs to be on the same block. - if (node != null) + if (!targetCbs.Contains(targetCb)) { - Operand offset = Local(); - Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset)); - list.AddBefore(node, addOp); - - op.SetSource(addressIndex, offset); + targetCbs.Add(targetCb); } } + else if (phiSource.AsgOp is PhiNode phi2 && visited.Add(phi2)) + { + phis.Enqueue(phi2); + } } } - } - private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int cbSlot, int baseAddressCbOffset) - { - if (IsCbOffset(address, cbSlot, baseAddressCbOffset)) + targetCbs.Sort(); + + if (targetCbs.Count == 0) { - // Direct offset: zero. - return (Const(0), 0); + config.GpuAccessor.Log($"Failed to find storage buffer for global memory operation \"{operation.Inst}\"."); } - (address, int constantOffset) = GetStorageConstantOffset(block, address); + if (gtsContext.TryGetFunctionId(operation, isMultiTarget: true, targetCbs, out functionId)) + { + return true; + } - address = Utils.FindLastOperation(address, block); + int inArgumentsCount = 2; - if (IsCbOffset(address, cbSlot, baseAddressCbOffset)) + if (operation.Inst == Instruction.AtomicCompareAndSwap) + { + inArgumentsCount = 4; + } + else if (operation.Inst == Instruction.Store || operation.Inst.IsAtomic()) { - // Only constant offset - return (Const(0), constantOffset); + inArgumentsCount = 3; } - if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add) + EmitterContext context = new EmitterContext(); + + Operand globalAddressLow = Argument(0); + Operand globalAddressHigh = Argument(1); + + foreach (uint targetCb in targetCbs) { - return (null, 0); + (int sbCbSlot, int sbCbOffset) = UnpackCbSlotAndOffset(targetCb); + + Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset); + Operand baseAddrHigh = Cbuf(sbCbSlot, sbCbOffset + 1); + Operand size = Cbuf(sbCbSlot, sbCbOffset + 2); + + Operand offset = context.ISubtract(globalAddressLow, baseAddrLow); + Operand borrow = context.ICompareLessUnsigned(globalAddressLow, baseAddrLow); + + Operand inRangeLow = context.ICompareLessUnsigned(offset, size); + + Operand addrHighBorrowed = context.IAdd(globalAddressHigh, borrow); + + Operand inRangeHigh = context.ICompareEqual(addrHighBorrowed, baseAddrHigh); + + Operand inRange = context.BitwiseAnd(inRangeLow, inRangeHigh); + + Operand lblSkip = Label(); + context.BranchIfFalse(lblSkip, inRange); + + Operand compare = null; + Operand value = null; + + if (inArgumentsCount == 4) + { + compare = Argument(2); + value = Argument(3); + } + else if (inArgumentsCount == 3) + { + value = Argument(2); + } + + SearchResult result = new SearchResult(sbCbSlot, sbCbOffset); + + int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment(); + + Operand baseAddressMasked = context.BitwiseAnd(baseAddrLow, Const(-alignment)); + Operand hostOffset = context.ISubtract(globalAddressLow, baseAddressMasked); + + if (!TryGenerateStorageOp( + config, + context, + operation.Inst, + operation.StorageKind, + hostOffset, + compare, + value, + result, + out Operand resultValue)) + { + functionId = 0; + return false; + } + + if (resultValue != null) + { + context.Return(resultValue); + } + else + { + context.Return(); + } + + context.MarkLabel(lblSkip); } - Operand src1 = offsetAdd.GetSource(0); - Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block); + bool returnsValue = operation.Dest != null; - if (IsCbOffset(src2, cbSlot, baseAddressCbOffset)) + if (returnsValue) { - return (src1, constantOffset); + context.Return(Const(0)); } - else if (IsCbOffset(src1, cbSlot, baseAddressCbOffset)) + else { - return (src2, constantOffset); + context.Return(); } - return (null, 0); + string functionName = GetFunctionName(operation, isMultiTarget: true, targetCbs); + + Function function = new Function( + ControlFlowGraph.Create(context.GetOperations()).Blocks, + functionName, + returnsValue, + inArgumentsCount, + 0); + + functionId = gtsContext.AddFunction(operation, isMultiTarget: true, targetCbs, function); + + return true; } - private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address) + private static uint PackCbSlotAndOffset(int cbSlot, int cbOffset) { - if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add) + return (uint)((ushort)cbSlot | ((ushort)cbOffset << 16)); + } + + private static (int, int) UnpackCbSlotAndOffset(uint packed) + { + return ((ushort)packed, (ushort)(packed >> 16)); + } + + private static string GetFunctionName(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs) + { + string name = baseOp.Inst.ToString(); + + name += baseOp.StorageKind switch + { + StorageKind.GlobalMemoryS8 => "S8", + StorageKind.GlobalMemoryS16 => "S16", + StorageKind.GlobalMemoryU8 => "U8", + StorageKind.GlobalMemoryU16 => "U16", + _ => string.Empty + }; + + if (isMultiTarget) { - return (address, 0); + name += "Multi"; } - Operand src1 = offsetAdd.GetSource(0); - Operand src2 = offsetAdd.GetSource(1); - - if (src2.Type != OperandType.Constant) + foreach (uint targetCb in targetCbs) { - return (address, 0); + (int sbCbSlot, int sbCbOffset) = UnpackCbSlotAndOffset(targetCb); + + name += $"_c{sbCbSlot}o{sbCbOffset}"; } - return (src1, src2.Value); + return name; } - private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex) + private static bool TryGenerateStorageOp( + ShaderConfig config, + EmitterContext context, + Instruction inst, + StorageKind storageKind, + Operand offset, + Operand compare, + Operand value, + SearchResult result, + out Operand resultValue) { - Operation operation = (Operation)node.Value; + resultValue = null; + bool isStore = inst.IsAtomic() || inst == Instruction.Store; - Operand GetCbufOffset() + if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding)) { - Operand addrLow = operation.GetSource(0); - - Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize); + return false; + } - Operand baseAddrTrunc = Local(); + Operand wordOffset = context.ShiftRightU32(offset, Const(2)); - Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + if (inst.IsAtomic()) + { + if (IsSmallInt(storageKind)) + { + throw new NotImplementedException(); + } - Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask); + switch (inst) + { + case Instruction.AtomicAdd: + resultValue = context.AtomicAdd(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicAnd: + resultValue = context.AtomicAnd(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicCompareAndSwap: + resultValue = context.AtomicCompareAndSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, compare, value); + break; + case Instruction.AtomicMaxS32: + if (config.Options.TargetLanguage == TargetLanguage.Spirv) + { + resultValue = context.AtomicMaxS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + } + else + { + resultValue = GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => + { + return context.IMaximumS32(memValue, value); + }); + } + break; + case Instruction.AtomicMaxU32: + resultValue = context.AtomicMaxU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicMinS32: + if (config.Options.TargetLanguage == TargetLanguage.Spirv) + { + resultValue = context.AtomicMinS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + } + else + { + resultValue = GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => + { + return context.IMinimumS32(memValue, value); + }); + } + break; + case Instruction.AtomicMinU32: + resultValue = context.AtomicMinU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicOr: + resultValue = context.AtomicOr(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicSwap: + resultValue = context.AtomicSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicXor: + resultValue = context.AtomicXor(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + } + } + else if (inst == Instruction.Store) + { + int bitSize = storageKind switch + { + StorageKind.GlobalMemoryS8 or + StorageKind.GlobalMemoryU8 => 8, + StorageKind.GlobalMemoryS16 or + StorageKind.GlobalMemoryU16 => 16, + _ => 32 + }; - node.List.AddBefore(node, andOp); + if (bitSize < 32) + { + Operand bitOffset = GetBitOffset(context, offset); - Operand byteOffset = Local(); - Operand wordOffset = Local(); + GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => + { + return context.BitfieldInsert(memValue, value, bitOffset, Const(bitSize)); + }); + } + else + { + context.Store(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + } + } + else + { + value = context.Load(StorageKind.StorageBuffer, binding, Const(0), wordOffset); - Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc); - Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); + if (IsSmallInt(storageKind)) + { + Operand bitOffset = GetBitOffset(context, offset); - node.List.AddBefore(node, subOp); - node.List.AddBefore(node, shrOp); + switch (storageKind) + { + case StorageKind.GlobalMemoryS8: + value = context.ShiftRightS32(value, bitOffset); + value = context.BitfieldExtractS32(value, Const(0), Const(8)); + break; + case StorageKind.GlobalMemoryS16: + value = context.ShiftRightS32(value, bitOffset); + value = context.BitfieldExtractS32(value, Const(0), Const(16)); + break; + case StorageKind.GlobalMemoryU8: + value = context.ShiftRightU32(value, bitOffset); + value = context.BitwiseAnd(value, Const(byte.MaxValue)); + break; + case StorageKind.GlobalMemoryU16: + value = context.ShiftRightU32(value, bitOffset); + value = context.BitwiseAnd(value, Const(ushort.MaxValue)); + break; + } + } - return wordOffset; + resultValue = value; } - Operand cbufOffset = GetCbufOffset(); - Operand vecIndex = Local(); - Operand elemIndex = Local(); - - node.List.AddBefore(node, new Operation(Instruction.ShiftRightU32, 0, vecIndex, cbufOffset, Const(2))); - node.List.AddBefore(node, new Operation(Instruction.BitwiseAnd, 0, elemIndex, cbufOffset, Const(3))); + return true; + } - Operand[] sources = new Operand[4]; + private static Operand GetBitOffset(EmitterContext context, Operand offset) + { + return context.ShiftLeft(context.BitwiseAnd(offset, Const(3)), Const(3)); + } - int cbSlot = UbeFirstCbuf + storageIndex; + private static Operand GenerateAtomicCasLoop(EmitterContext context, Operand wordOffset, int binding, Func<Operand, Operand> opCallback) + { + Operand lblLoopHead = Label(); - sources[0] = Const(config.ResourceManager.GetConstantBufferBinding(cbSlot)); - sources[1] = Const(0); - sources[2] = vecIndex; - sources[3] = elemIndex; + context.MarkLabel(lblLoopHead); - Operation ldcOp = new Operation(Instruction.Load, StorageKind.ConstantBuffer, operation.Dest, sources); + Operand oldValue = context.Load(StorageKind.StorageBuffer, binding, Const(0), wordOffset); + Operand newValue = opCallback(oldValue); - for (int index = 0; index < operation.SourcesCount; index++) - { - operation.SetSource(index, null); - } + Operand casResult = context.AtomicCompareAndSwap( + StorageKind.StorageBuffer, + binding, + Const(0), + wordOffset, + oldValue, + newValue); - LinkedListNode<INode> oldNode = node; + Operand casFail = context.ICompareNotEqual(casResult, oldValue); - node = node.List.AddBefore(node, ldcOp); + context.BranchIfTrue(lblLoopHead, casFail); - node.List.Remove(oldNode); - - return node; + return oldValue; } - private static SearchResult SearchForStorageBase(ShaderConfig config, BasicBlock block, Operand globalAddress) + private static SearchResult FindUniqueBaseAddressCb(GtsContext gtsContext, BasicBlock block, Operand globalAddress, bool needsOffset) { globalAddress = Utils.FindLastOperation(globalAddress, block); if (globalAddress.Type == OperandType.ConstantBuffer) { - return GetStorageIndex(config, globalAddress); + return GetBaseAddressCbWithOffset(globalAddress, Const(0), 0); } Operation operation = globalAddress.AsgOp as Operation; if (operation == null || operation.Inst != Instruction.Add) { - return SearchResult.NotFound; + return FindBaseAddressCbFromMemory(gtsContext, operation, 0, needsOffset); } Operand src1 = operation.GetSource(0); Operand src2 = operation.GetSource(1); + int constOffset = 0; + if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) || (src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant)) { Operand baseAddr; + Operand offset; if (src1.Type == OperandType.LocalVariable) { baseAddr = Utils.FindLastOperation(src1, block); + offset = src2; } else { baseAddr = Utils.FindLastOperation(src2, block); + offset = src1; } - var result = GetStorageIndex(config, baseAddr); + var result = GetBaseAddressCbWithOffset(baseAddr, offset, 0); if (result.Found) { return result; } + constOffset = offset.Value; operation = baseAddr.AsgOp as Operation; if (operation == null || operation.Inst != Instruction.Add) { - return SearchResult.NotFound; + return FindBaseAddressCbFromMemory(gtsContext, operation, constOffset, needsOffset); } } - var selectedResult = SearchResult.NotFound; + src1 = operation.GetSource(0); + src2 = operation.GetSource(1); + + // If we have two possible results, we give preference to the ones from + // the driver reserved constant buffer, as those are the ones that + // contains the base address. - for (int index = 0; index < operation.SourcesCount; index++) + // If both are constant buffer, give preference to the second operand, + // because constant buffer are always encoded as the second operand, + // so the second operand will always be the one from the last instruction. + + if (src1.Type != OperandType.ConstantBuffer || + (src1.Type == OperandType.ConstantBuffer && src2.Type == OperandType.ConstantBuffer) || + (src2.Type == OperandType.ConstantBuffer && src2.GetCbufSlot() == DriverReservedCb)) { - Operand source = operation.GetSource(index); + return GetBaseAddressCbWithOffset(src2, src1, constOffset); + } - var result = GetStorageIndex(config, source); + return GetBaseAddressCbWithOffset(src1, src2, constOffset); + } - // If we already have a result, we give preference to the ones from - // the driver reserved constant buffer, as those are the ones that - // contains the base address. - if (result.Found && (!selectedResult.Found || result.SbCbSlot == GlobalMemory.DriverReservedCb)) + private static SearchResult FindBaseAddressCbFromMemory(GtsContext gtsContext, Operation operation, int constOffset, bool needsOffset) + { + if (operation != null) + { + if (TryGetMemoryOffsets(operation, out LsMemoryType type, out Operand bo, out int co) && + gtsContext.TryGetMemoryTargetCb(type, bo, co, out SearchResult result) && + (result.Offset != null || !needsOffset)) { - selectedResult = result; + if (constOffset != 0) + { + return new SearchResult( + result.SbCbSlot, + result.SbCbOffset, + result.Offset, + result.ConstOffset + constOffset); + } + + return result; } } - return selectedResult; + return SearchResult.NotFound; } - private static SearchResult GetStorageIndex(ShaderConfig config, Operand operand) + private static SearchResult GetBaseAddressCbWithOffset(Operand baseAddress, Operand offset, int constOffset) { - if (operand.Type == OperandType.ConstantBuffer) + if (baseAddress.Type == OperandType.ConstantBuffer) { - int slot = operand.GetCbufSlot(); - int offset = operand.GetCbufOffset(); + int sbCbSlot = baseAddress.GetCbufSlot(); + int sbCbOffset = baseAddress.GetCbufOffset(); - if ((offset & 3) == 0) + // We require the offset to be aligned to 1 word (64 bits), + // since the address size is 64-bit and the GPU only supports aligned memory access. + if ((sbCbOffset & 1) == 0) { - return new SearchResult(slot, offset); + return new SearchResult(sbCbSlot, sbCbOffset, offset, constOffset); } } return SearchResult.NotFound; } - private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd) + private static bool TryGetMemoryOffsets(Operation operation, out LsMemoryType type, out Operand baseOffset, out int constOffset) { - if (operand.Type == OperandType.ConstantBuffer) + baseOffset = null; + + if (operation.Inst == Instruction.LoadShared || operation.Inst == Instruction.StoreShared) + { + type = LsMemoryType.Shared; + return TryGetSharedMemoryOffsets(operation, out baseOffset, out constOffset); + } + else if (operation.Inst == Instruction.LoadLocal || operation.Inst == Instruction.StoreLocal) { - int slot = operand.GetCbufSlot(); - int offset = operand.GetCbufOffset(); + type = LsMemoryType.Local; + return TryGetLocalMemoryOffset(operation, out constOffset); + } - if (slot == 0 && offset >= sbStart && offset < sbEnd) - { - int storageIndex = (offset - sbStart) / StorageDescSize; + type = default; + constOffset = 0; + return false; + } + + private static bool TryGetSharedMemoryOffsets(Operation operation, out Operand baseOffset, out int constOffset) + { + baseOffset = null; + constOffset = 0; + + // The byte offset is right shifted by 2 to get the 32-bit word offset, + // so we want to get the byte offset back, since each one of those word + // offsets are a new "local variable" which will not match. - return storageIndex; + if (operation.GetSource(0).AsgOp is Operation shiftRightOp && + shiftRightOp.Inst == Instruction.ShiftRightU32 && + shiftRightOp.GetSource(1).Type == OperandType.Constant && + shiftRightOp.GetSource(1).Value == 2) + { + baseOffset = shiftRightOp.GetSource(0); + } + + // Check if we have a constant offset being added to the base offset. + + if (baseOffset?.AsgOp is Operation addOp && addOp.Inst == Instruction.Add) + { + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Type == OperandType.Constant && src2.Type == OperandType.LocalVariable) + { + constOffset = src1.Value; + baseOffset = src2; } + else if (src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) + { + baseOffset = src1; + constOffset = src2.Value; + } + } + + return baseOffset != null && baseOffset.Type == OperandType.LocalVariable; + } + + private static bool TryGetLocalMemoryOffset(Operation operation, out int constOffset) + { + if (operation.GetSource(0).Type == OperandType.Constant) + { + constOffset = operation.GetSource(0).Value; + return true; } - return -1; + constOffset = 0; + return false; } } }
\ No newline at end of file |