aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs')
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs765
1 files changed, 765 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
new file mode 100644
index 00000000..c619b9bb
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
@@ -0,0 +1,765 @@
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.CompilerServices;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ static class Decoder
+ {
+ public static DecodedProgram Decode(ShaderConfig config, ulong startAddress)
+ {
+ Queue<DecodedFunction> functionsQueue = new Queue<DecodedFunction>();
+ Dictionary<ulong, DecodedFunction> functionsVisited = new Dictionary<ulong, DecodedFunction>();
+
+ DecodedFunction EnqueueFunction(ulong address)
+ {
+ if (!functionsVisited.TryGetValue(address, out DecodedFunction function))
+ {
+ functionsVisited.Add(address, function = new DecodedFunction(address));
+ functionsQueue.Enqueue(function);
+ }
+
+ return function;
+ }
+
+ DecodedFunction mainFunction = EnqueueFunction(0);
+
+ while (functionsQueue.TryDequeue(out DecodedFunction currentFunction))
+ {
+ List<Block> blocks = new List<Block>();
+ Queue<Block> workQueue = new Queue<Block>();
+ Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
+
+ Block GetBlock(ulong blkAddress)
+ {
+ if (!visited.TryGetValue(blkAddress, out Block block))
+ {
+ block = new Block(blkAddress);
+
+ workQueue.Enqueue(block);
+ visited.Add(blkAddress, block);
+ }
+
+ return block;
+ }
+
+ GetBlock(currentFunction.Address);
+
+ bool hasNewTarget;
+
+ do
+ {
+ while (workQueue.TryDequeue(out Block currBlock))
+ {
+ // Check if the current block is inside another block.
+ if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ if (nBlock.Address == currBlock.Address)
+ {
+ throw new InvalidOperationException("Found duplicate block address on the list.");
+ }
+
+ nBlock.Split(currBlock);
+ blocks.Insert(nBlkIndex + 1, currBlock);
+
+ continue;
+ }
+
+ // If we have a block after the current one, set the limit address.
+ ulong limitAddress = ulong.MaxValue;
+
+ if (nBlkIndex != blocks.Count)
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ int nextIndex = nBlkIndex + 1;
+
+ if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
+ {
+ limitAddress = blocks[nextIndex].Address;
+ }
+ else if (nBlock.Address > currBlock.Address)
+ {
+ limitAddress = blocks[nBlkIndex].Address;
+ }
+ }
+
+ FillBlock(config, currBlock, limitAddress, startAddress);
+
+ if (currBlock.OpCodes.Count != 0)
+ {
+ // We should have blocks for all possible branch targets,
+ // including those from PBK/PCNT/SSY instructions.
+ foreach (PushOpInfo pushOp in currBlock.PushOpCodes)
+ {
+ GetBlock(pushOp.Op.GetAbsoluteAddress());
+ }
+
+ // Set child blocks. "Branch" is the block the branch instruction
+ // points to (when taken), "Next" is the block at the next address,
+ // executed when the branch is not taken. For Unconditional Branches
+ // or end of program, Next is null.
+ InstOp lastOp = currBlock.GetLastOp();
+
+ if (lastOp.Name == InstName.Cal)
+ {
+ EnqueueFunction(lastOp.GetAbsoluteAddress()).AddCaller(currentFunction);
+ }
+ else if (lastOp.Name == InstName.Bra)
+ {
+ Block succBlock = GetBlock(lastOp.GetAbsoluteAddress());
+ currBlock.Successors.Add(succBlock);
+ succBlock.Predecessors.Add(currBlock);
+ }
+
+ if (!IsUnconditionalBranch(ref lastOp))
+ {
+ Block succBlock = GetBlock(currBlock.EndAddress);
+ currBlock.Successors.Insert(0, succBlock);
+ succBlock.Predecessors.Add(currBlock);
+ }
+ }
+
+ // Insert the new block on the list (sorted by address).
+ if (blocks.Count != 0)
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
+ }
+ else
+ {
+ blocks.Add(currBlock);
+ }
+ }
+
+ // Propagate SSY/PBK addresses into their uses (SYNC/BRK).
+ foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
+ {
+ for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
+ {
+ PropagatePushOp(visited, block, pushOpIndex);
+ }
+ }
+
+ // Try to find targets for BRX (indirect branch) instructions.
+ hasNewTarget = FindBrxTargets(config, blocks, GetBlock);
+
+ // If we discovered new branch targets from the BRX instruction,
+ // we need another round of decoding to decode the new blocks.
+ // Additionally, we may have more SSY/PBK targets to propagate,
+ // and new BRX instructions.
+ }
+ while (hasNewTarget);
+
+ currentFunction.SetBlocks(blocks.ToArray());
+ }
+
+ return new DecodedProgram(mainFunction, functionsVisited);
+ }
+
+ private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
+ {
+ index = 0;
+
+ int left = 0;
+ int right = blocks.Count - 1;
+
+ while (left <= right)
+ {
+ int size = right - left;
+
+ int middle = left + (size >> 1);
+
+ Block block = blocks[middle];
+
+ index = middle;
+
+ if (address >= block.Address && address < block.EndAddress)
+ {
+ return true;
+ }
+
+ if (address < block.Address)
+ {
+ right = middle - 1;
+ }
+ else
+ {
+ left = middle + 1;
+ }
+ }
+
+ return false;
+ }
+
+ private static void FillBlock(ShaderConfig config, Block block, ulong limitAddress, ulong startAddress)
+ {
+ IGpuAccessor gpuAccessor = config.GpuAccessor;
+
+ ulong address = block.Address;
+ int bufferOffset = 0;
+ ReadOnlySpan<ulong> buffer = ReadOnlySpan<ulong>.Empty;
+
+ InstOp op = default;
+
+ do
+ {
+ if (address + 7 >= limitAddress)
+ {
+ break;
+ }
+
+ // Ignore scheduling instructions, which are written every 32 bytes.
+ if ((address & 0x1f) == 0)
+ {
+ address += 8;
+ bufferOffset++;
+ continue;
+ }
+
+ if (bufferOffset >= buffer.Length)
+ {
+ buffer = gpuAccessor.GetCode(startAddress + address, 8);
+ bufferOffset = 0;
+ }
+
+ ulong opCode = buffer[bufferOffset++];
+
+ op = InstTable.GetOp(address, opCode);
+
+ if (op.Props.HasFlag(InstProps.TexB))
+ {
+ config.SetUsedFeature(FeatureFlags.Bindless);
+ }
+
+ if (op.Name == InstName.Ald || op.Name == InstName.Ast || op.Name == InstName.Ipa)
+ {
+ SetUserAttributeUses(config, op.Name, opCode);
+ }
+ else if (op.Name == InstName.Pbk || op.Name == InstName.Pcnt || op.Name == InstName.Ssy)
+ {
+ block.AddPushOp(op);
+ }
+
+ block.OpCodes.Add(op);
+
+ address += 8;
+ }
+ while (!op.Props.HasFlag(InstProps.Bra));
+
+ block.EndAddress = address;
+ }
+
+ private static void SetUserAttributeUses(ShaderConfig config, InstName name, ulong opCode)
+ {
+ int offset;
+ int count = 1;
+ bool isStore = false;
+ bool indexed = false;
+ bool perPatch = false;
+
+ if (name == InstName.Ast)
+ {
+ InstAst opAst = new InstAst(opCode);
+ count = (int)opAst.AlSize + 1;
+ offset = opAst.Imm11;
+ indexed = opAst.Phys;
+ perPatch = opAst.P;
+ isStore = true;
+ }
+ else if (name == InstName.Ald)
+ {
+ InstAld opAld = new InstAld(opCode);
+ count = (int)opAld.AlSize + 1;
+ offset = opAld.Imm11;
+ indexed = opAld.Phys;
+ perPatch = opAld.P;
+ isStore = opAld.O;
+ }
+ else /* if (name == InstName.Ipa) */
+ {
+ InstIpa opIpa = new InstIpa(opCode);
+ offset = opIpa.Imm10;
+ indexed = opIpa.Idx;
+ }
+
+ if (indexed)
+ {
+ if (isStore)
+ {
+ config.SetAllOutputUserAttributes();
+ config.SetUsedFeature(FeatureFlags.OaIndexing);
+ }
+ else
+ {
+ config.SetAllInputUserAttributes();
+ config.SetUsedFeature(FeatureFlags.IaIndexing);
+ }
+ }
+ else
+ {
+ for (int elemIndex = 0; elemIndex < count; elemIndex++)
+ {
+ int attr = offset + elemIndex * 4;
+
+ if (perPatch)
+ {
+ if (attr >= AttributeConsts.UserAttributePerPatchBase && attr < AttributeConsts.UserAttributePerPatchEnd)
+ {
+ int userAttr = attr - AttributeConsts.UserAttributePerPatchBase;
+ int index = userAttr / 16;
+
+ if (isStore)
+ {
+ config.SetOutputUserAttributePerPatch(index);
+ }
+ else
+ {
+ config.SetInputUserAttributePerPatch(index);
+ }
+ }
+ }
+ else if (attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd)
+ {
+ int userAttr = attr - AttributeConsts.UserAttributeBase;
+ int index = userAttr / 16;
+
+ if (isStore)
+ {
+ config.SetOutputUserAttribute(index);
+ }
+ else
+ {
+ config.SetInputUserAttribute(index, (userAttr >> 2) & 3);
+ }
+ }
+
+ if (!isStore &&
+ (attr == AttributeConsts.FogCoord ||
+ (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0) ||
+ (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)))
+ {
+ config.SetUsedFeature(FeatureFlags.FixedFuncAttr);
+ }
+ }
+ }
+ }
+
+ public static bool IsUnconditionalBranch(ref InstOp op)
+ {
+ return IsUnconditional(ref op) && op.Props.HasFlag(InstProps.Bra);
+ }
+
+ private static bool IsUnconditional(ref InstOp op)
+ {
+ InstConditional condOp = new InstConditional(op.RawOpCode);
+
+ if ((op.Name == InstName.Bra || op.Name == InstName.Exit) && condOp.Ccc != Ccc.T)
+ {
+ return false;
+ }
+
+ return condOp.Pred == RegisterConsts.PredicateTrueIndex && !condOp.PredInv;
+ }
+
+ private static bool FindBrxTargets(ShaderConfig config, IEnumerable<Block> blocks, Func<ulong, Block> getBlock)
+ {
+ bool hasNewTarget = false;
+
+ foreach (Block block in blocks)
+ {
+ InstOp lastOp = block.GetLastOp();
+ bool hasNext = block.HasNext();
+
+ if (lastOp.Name == InstName.Brx && block.Successors.Count == (hasNext ? 1 : 0))
+ {
+ HashSet<ulong> visited = new HashSet<ulong>();
+
+ InstBrx opBrx = new InstBrx(lastOp.RawOpCode);
+ ulong baseOffset = lastOp.GetAbsoluteAddress();
+
+ // An indirect branch could go anywhere,
+ // try to get the possible target offsets from the constant buffer.
+ (int cbBaseOffset, int cbOffsetsCount) = FindBrxTargetRange(block, opBrx.SrcA);
+
+ if (cbOffsetsCount != 0)
+ {
+ hasNewTarget = true;
+ }
+
+ for (int i = 0; i < cbOffsetsCount; i++)
+ {
+ uint targetOffset = config.ConstantBuffer1Read(cbBaseOffset + i * 4);
+ ulong targetAddress = baseOffset + targetOffset;
+
+ if (visited.Add(targetAddress))
+ {
+ Block target = getBlock(targetAddress);
+ target.Predecessors.Add(block);
+ block.Successors.Add(target);
+ }
+ }
+ }
+ }
+
+ return hasNewTarget;
+ }
+
+ private static (int, int) FindBrxTargetRange(Block block, int brxReg)
+ {
+ // Try to match the following pattern:
+ //
+ // IMNMX.U32 Rx, Rx, UpperBound, PT
+ // SHL Rx, Rx, 0x2
+ // LDC Rx, c[0x1][Rx+BaseOffset]
+ //
+ // Here, Rx is an arbitrary register, "UpperBound" and "BaseOffset" are constants.
+ // The above pattern is assumed to be generated by the compiler before BRX,
+ // as the instruction is usually used to implement jump tables for switch statement optimizations.
+ // On a successful match, "BaseOffset" is the offset in bytes where the jump offsets are
+ // located on the constant buffer, and "UpperBound" is the total number of offsets for the BRX, minus 1.
+
+ HashSet<Block> visited = new HashSet<Block>();
+
+ var ldcLocation = FindFirstRegWrite(visited, new BlockLocation(block, block.OpCodes.Count - 1), brxReg);
+ if (ldcLocation.Block == null || ldcLocation.Block.OpCodes[ldcLocation.Index].Name != InstName.Ldc)
+ {
+ return (0, 0);
+ }
+
+ GetOp<InstLdc>(ldcLocation, out var opLdc);
+
+ if (opLdc.CbufSlot != 1 || opLdc.AddressMode != 0)
+ {
+ return (0, 0);
+ }
+
+ var shlLocation = FindFirstRegWrite(visited, ldcLocation, opLdc.SrcA);
+ if (shlLocation.Block == null || !shlLocation.IsImmInst(InstName.Shl))
+ {
+ return (0, 0);
+ }
+
+ GetOp<InstShlI>(shlLocation, out var opShl);
+
+ if (opShl.Imm20 != 2)
+ {
+ return (0, 0);
+ }
+
+ var imnmxLocation = FindFirstRegWrite(visited, shlLocation, opShl.SrcA);
+ if (imnmxLocation.Block == null || !imnmxLocation.IsImmInst(InstName.Imnmx))
+ {
+ return (0, 0);
+ }
+
+ GetOp<InstImnmxI>(imnmxLocation, out var opImnmx);
+
+ if (opImnmx.Signed || opImnmx.SrcPred != RegisterConsts.PredicateTrueIndex || opImnmx.SrcPredInv)
+ {
+ return (0, 0);
+ }
+
+ return (opLdc.CbufOffset, opImnmx.Imm20 + 1);
+ }
+
+ private static void GetOp<T>(BlockLocation location, out T op) where T : unmanaged
+ {
+ ulong rawOp = location.Block.OpCodes[location.Index].RawOpCode;
+ op = Unsafe.As<ulong, T>(ref rawOp);
+ }
+
+ private readonly struct BlockLocation
+ {
+ public Block Block { get; }
+ public int Index { get; }
+
+ public BlockLocation(Block block, int index)
+ {
+ Block = block;
+ Index = index;
+ }
+
+ public bool IsImmInst(InstName name)
+ {
+ InstOp op = Block.OpCodes[Index];
+ return op.Name == name && op.Props.HasFlag(InstProps.Ib);
+ }
+ }
+
+ private static BlockLocation FindFirstRegWrite(HashSet<Block> visited, BlockLocation location, int regIndex)
+ {
+ Queue<BlockLocation> toVisit = new Queue<BlockLocation>();
+ toVisit.Enqueue(location);
+ visited.Add(location.Block);
+
+ while (toVisit.TryDequeue(out var currentLocation))
+ {
+ Block block = currentLocation.Block;
+ for (int i = currentLocation.Index - 1; i >= 0; i--)
+ {
+ if (WritesToRegister(block.OpCodes[i], regIndex))
+ {
+ return new BlockLocation(block, i);
+ }
+ }
+
+ foreach (Block predecessor in block.Predecessors)
+ {
+ if (visited.Add(predecessor))
+ {
+ toVisit.Enqueue(new BlockLocation(predecessor, predecessor.OpCodes.Count));
+ }
+ }
+ }
+
+ return new BlockLocation(null, 0);
+ }
+
+ private static bool WritesToRegister(InstOp op, int regIndex)
+ {
+ // Predicate instruction only ever writes to predicate, so we shouldn't check those.
+ if ((op.Props & (InstProps.Rd | InstProps.Rd2)) == 0)
+ {
+ return false;
+ }
+
+ if (op.Props.HasFlag(InstProps.Rd2) && (byte)(op.RawOpCode >> 28) == regIndex)
+ {
+ return true;
+ }
+
+ return (byte)op.RawOpCode == regIndex;
+ }
+
+ private enum MergeType
+ {
+ Brk,
+ Cont,
+ Sync
+ }
+
+ private struct PathBlockState
+ {
+ public Block Block { get; }
+
+ private enum RestoreType
+ {
+ None,
+ PopPushOp,
+ PushBranchOp
+ }
+
+ private RestoreType _restoreType;
+
+ private ulong _restoreValue;
+ private MergeType _restoreMergeType;
+
+ public bool ReturningFromVisit => _restoreType != RestoreType.None;
+
+ public PathBlockState(Block block)
+ {
+ Block = block;
+ _restoreType = RestoreType.None;
+ _restoreValue = 0;
+ _restoreMergeType = default;
+ }
+
+ public PathBlockState(int oldStackSize)
+ {
+ Block = null;
+ _restoreType = RestoreType.PopPushOp;
+ _restoreValue = (ulong)oldStackSize;
+ _restoreMergeType = default;
+ }
+
+ public PathBlockState(ulong syncAddress, MergeType mergeType)
+ {
+ Block = null;
+ _restoreType = RestoreType.PushBranchOp;
+ _restoreValue = syncAddress;
+ _restoreMergeType = mergeType;
+ }
+
+ public void RestoreStackState(Stack<(ulong, MergeType)> branchStack)
+ {
+ if (_restoreType == RestoreType.PushBranchOp)
+ {
+ branchStack.Push((_restoreValue, _restoreMergeType));
+ }
+ else if (_restoreType == RestoreType.PopPushOp)
+ {
+ while (branchStack.Count > (uint)_restoreValue)
+ {
+ branchStack.Pop();
+ }
+ }
+ }
+ }
+
+ private static void PropagatePushOp(Dictionary<ulong, Block> blocks, Block currBlock, int pushOpIndex)
+ {
+ PushOpInfo pushOpInfo = currBlock.PushOpCodes[pushOpIndex];
+ InstOp pushOp = pushOpInfo.Op;
+
+ Block target = blocks[pushOp.GetAbsoluteAddress()];
+
+ Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
+ HashSet<Block> visited = new HashSet<Block>();
+ Stack<(ulong, MergeType)> branchStack = new Stack<(ulong, MergeType)>();
+
+ void Push(PathBlockState pbs)
+ {
+ // When block is null, this means we are pushing a restore operation.
+ // Restore operations are used to undo the work done inside a block
+ // when we return from it, for example it pops addresses pushed by
+ // SSY/PBK instructions inside the block, and pushes addresses poped
+ // by SYNC/BRK.
+ // For blocks, if it's already visited, we just ignore to avoid going
+ // around in circles and getting stuck here.
+ if (pbs.Block == null || !visited.Contains(pbs.Block))
+ {
+ workQueue.Push(pbs);
+ }
+ }
+
+ Push(new PathBlockState(currBlock));
+
+ while (workQueue.TryPop(out PathBlockState pbs))
+ {
+ if (pbs.ReturningFromVisit)
+ {
+ pbs.RestoreStackState(branchStack);
+
+ continue;
+ }
+
+ Block current = pbs.Block;
+
+ // If the block was already processed, we just ignore it, otherwise
+ // we would push the same child blocks of an already processed block,
+ // and go around in circles until memory is exhausted.
+ if (!visited.Add(current))
+ {
+ continue;
+ }
+
+ int pushOpsCount = current.PushOpCodes.Count;
+ if (pushOpsCount != 0)
+ {
+ Push(new PathBlockState(branchStack.Count));
+
+ for (int index = pushOpIndex; index < pushOpsCount; index++)
+ {
+ InstOp currentPushOp = current.PushOpCodes[index].Op;
+ MergeType pushMergeType = GetMergeTypeFromPush(currentPushOp.Name);
+ branchStack.Push((currentPushOp.GetAbsoluteAddress(), pushMergeType));
+ }
+ }
+
+ pushOpIndex = 0;
+
+ bool hasNext = current.HasNext();
+ if (hasNext)
+ {
+ Push(new PathBlockState(current.Successors[0]));
+ }
+
+ InstOp lastOp = current.GetLastOp();
+ if (IsPopBranch(lastOp.Name))
+ {
+ MergeType popMergeType = GetMergeTypeFromPop(lastOp.Name);
+
+ bool found = true;
+ ulong targetAddress = 0UL;
+ MergeType mergeType;
+
+ do
+ {
+ if (branchStack.Count == 0)
+ {
+ found = false;
+ break;
+ }
+
+ (targetAddress, mergeType) = branchStack.Pop();
+
+ // Push the target address (this will be used to push the address
+ // back into the PBK/PCNT/SSY stack when we return from that block),
+ Push(new PathBlockState(targetAddress, mergeType));
+ }
+ while (mergeType != popMergeType);
+
+ // Make sure we found the correct address,
+ // the push and pop instruction types must match, so:
+ // - BRK can only consume addresses pushed by PBK.
+ // - CONT can only consume addresses pushed by PCNT.
+ // - SYNC can only consume addresses pushed by SSY.
+ if (found)
+ {
+ if (branchStack.Count == 0)
+ {
+ // If the entire stack was consumed, then the current pop instruction
+ // just consumed the address from our push instruction.
+ if (current.SyncTargets.TryAdd(pushOp.Address, new SyncTarget(pushOpInfo, current.SyncTargets.Count)))
+ {
+ pushOpInfo.Consumers.Add(current, Local());
+ target.Predecessors.Add(current);
+ current.Successors.Add(target);
+ }
+ }
+ else
+ {
+ // Push the block itself into the work queue for processing.
+ Push(new PathBlockState(blocks[targetAddress]));
+ }
+ }
+ }
+ else
+ {
+ // By adding them in descending order (sorted by address), we process the blocks
+ // in order (of ascending address), since we work with a LIFO.
+ foreach (Block possibleTarget in current.Successors.OrderByDescending(x => x.Address))
+ {
+ if (!hasNext || possibleTarget != current.Successors[0])
+ {
+ Push(new PathBlockState(possibleTarget));
+ }
+ }
+ }
+ }
+ }
+
+ public static bool IsPopBranch(InstName name)
+ {
+ return name == InstName.Brk || name == InstName.Cont || name == InstName.Sync;
+ }
+
+ private static MergeType GetMergeTypeFromPush(InstName name)
+ {
+ return name switch
+ {
+ InstName.Pbk => MergeType.Brk,
+ InstName.Pcnt => MergeType.Cont,
+ _ => MergeType.Sync
+ };
+ }
+
+ private static MergeType GetMergeTypeFromPop(InstName name)
+ {
+ return name switch
+ {
+ InstName.Brk => MergeType.Brk,
+ InstName.Cont => MergeType.Cont,
+ _ => MergeType.Sync
+ };
+ }
+ }
+} \ No newline at end of file