using Ryujinx.Common.Logging; using Ryujinx.Common.Memory; using Ryujinx.Graphics.Device; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.GPFifo; using Ryujinx.Graphics.Gpu.Engine.Threed; using Ryujinx.Graphics.Gpu.Engine.Types; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Memory.Range; using System; using System.Collections.Generic; namespace Ryujinx.Graphics.Gpu.Engine.MME { /// /// Macro High-level emulation. /// class MacroHLE : IMacroEE { private const int ColorLayerCountOffset = 0x818; private const int ColorStructSize = 0x40; private const int ZetaLayerCountOffset = 0x1230; private const int UniformBufferBindVertexOffset = 0x2410; private const int FirstVertexOffset = 0x1434; private const int IndirectIndexedDataEntrySize = 0x14; private const int LogicOpOffset = 0x19c4; private const int ShaderIdScratchOffset = 0x3470; private const int ShaderAddressScratchOffset = 0x3488; private const int UpdateConstantBufferAddressesBase = 0x34a8; private const int UpdateConstantBufferSizesBase = 0x34bc; private const int UpdateConstantBufferAddressCbu = 0x3460; private readonly GPFifoProcessor _processor; private readonly MacroHLEFunctionName _functionName; /// /// Arguments FIFO. /// public Queue Fifo { get; } /// /// Creates a new instance of the HLE macro handler. /// /// GPU GP FIFO command processor /// Name of the HLE macro function to be called public MacroHLE(GPFifoProcessor processor, MacroHLEFunctionName functionName) { _processor = processor; _functionName = functionName; Fifo = new Queue(); } /// /// Executes a macro program until it exits. /// /// Code of the program to execute /// GPU state at the time of the call /// Optional argument passed to the program, 0 if not used public void Execute(ReadOnlySpan code, IDeviceState state, int arg0) { switch (_functionName) { case MacroHLEFunctionName.BindShaderProgram: BindShaderProgram(state, arg0); break; case MacroHLEFunctionName.ClearColor: ClearColor(state, arg0); break; case MacroHLEFunctionName.ClearDepthStencil: ClearDepthStencil(state, arg0); break; case MacroHLEFunctionName.DrawArraysInstanced: DrawArraysInstanced(state, arg0); break; case MacroHLEFunctionName.DrawElements: DrawElements(state, arg0); break; case MacroHLEFunctionName.DrawElementsInstanced: DrawElementsInstanced(state, arg0); break; case MacroHLEFunctionName.DrawElementsIndirect: DrawElementsIndirect(state, arg0); break; case MacroHLEFunctionName.MultiDrawElementsIndirectCount: MultiDrawElementsIndirectCount(state, arg0); break; case MacroHLEFunctionName.UpdateBlendState: UpdateBlendState(state, arg0); break; case MacroHLEFunctionName.UpdateColorMasks: UpdateColorMasks(state, arg0); break; case MacroHLEFunctionName.UpdateUniformBufferState: UpdateUniformBufferState(state, arg0); break; case MacroHLEFunctionName.UpdateUniformBufferStateCbu: UpdateUniformBufferStateCbu(state, arg0); break; case MacroHLEFunctionName.UpdateUniformBufferStateCbuV2: UpdateUniformBufferStateCbuV2(state, arg0); break; default: throw new NotImplementedException(_functionName.ToString()); } // It should be empty at this point, but clear it just to be safe. Fifo.Clear(); } /// /// Binds a shader program with the index in arg0. /// /// GPU state at the time of the call /// First argument of the call private void BindShaderProgram(IDeviceState state, int arg0) { int scratchOffset = ShaderIdScratchOffset + arg0 * 4; int lastId = state.Read(scratchOffset); int id = FetchParam().Word; int offset = FetchParam().Word; if (lastId == id) { FetchParam(); FetchParam(); return; } _processor.ThreedClass.SetShaderOffset(arg0, (uint)offset); // Removes overflow on the method address into the increment portion. // Present in the original macro. int addrMask = unchecked((int)0xfffc0fff) << 2; state.Write(scratchOffset & addrMask, id); state.Write((ShaderAddressScratchOffset + arg0 * 4) & addrMask, offset); int stage = FetchParam().Word; uint cbAddress = (uint)FetchParam().Word; _processor.ThreedClass.UpdateUniformBufferState(65536, cbAddress >> 24, cbAddress << 8); int stageOffset = (stage & 0x7f) << 3; state.Write((UniformBufferBindVertexOffset + stageOffset * 4) & addrMask, 17); } /// /// Updates uniform buffer state for update or bind. /// /// GPU state at the time of the call /// First argument of the call private void UpdateUniformBufferState(IDeviceState state, int arg0) { uint address = (uint)state.Read(UpdateConstantBufferAddressesBase + arg0 * 4); int size = state.Read(UpdateConstantBufferSizesBase + arg0 * 4); _processor.ThreedClass.UpdateUniformBufferState(size, address >> 24, address << 8); } /// /// Updates uniform buffer state for update. /// /// GPU state at the time of the call /// First argument of the call private void UpdateUniformBufferStateCbu(IDeviceState state, int arg0) { uint address = (uint)state.Read(UpdateConstantBufferAddressCbu); UniformBufferState ubState = new() { Address = new() { High = address >> 24, Low = address << 8 }, Size = 24320, Offset = arg0 << 2 }; _processor.ThreedClass.UpdateUniformBufferState(ubState); } /// /// Updates uniform buffer state for update. /// /// GPU state at the time of the call /// First argument of the call private void UpdateUniformBufferStateCbuV2(IDeviceState state, int arg0) { uint address = (uint)state.Read(UpdateConstantBufferAddressCbu); UniformBufferState ubState = new() { Address = new() { High = address >> 24, Low = address << 8 }, Size = 28672, Offset = arg0 << 2 }; _processor.ThreedClass.UpdateUniformBufferState(ubState); } /// /// Updates blend enable using the given argument. /// /// GPU state at the time of the call /// First argument of the call private void UpdateBlendState(IDeviceState state, int arg0) { state.Write(LogicOpOffset, 0); Array8 enable = new(); for (int i = 0; i < 8; i++) { enable[i] = new Boolean32((uint)(arg0 >> (i + 8)) & 1); } _processor.ThreedClass.UpdateBlendEnable(ref enable); } /// /// Updates color masks using the given argument and three pushed arguments. /// /// GPU state at the time of the call /// First argument of the call private void UpdateColorMasks(IDeviceState state, int arg0) { Array8 masks = new(); int index = 0; for (int i = 0; i < 4; i++) { masks[index++] = new RtColorMask((uint)arg0 & 0x1fff); masks[index++] = new RtColorMask(((uint)arg0 >> 16) & 0x1fff); if (i != 3) { arg0 = FetchParam().Word; } } _processor.ThreedClass.UpdateColorMasks(ref masks); } /// /// Clears one bound color target. /// /// GPU state at the time of the call /// First argument of the call private void ClearColor(IDeviceState state, int arg0) { int index = (arg0 >> 6) & 0xf; int layerCount = state.Read(ColorLayerCountOffset + index * ColorStructSize); _processor.ThreedClass.Clear(arg0, layerCount); } /// /// Clears the current depth-stencil target. /// /// GPU state at the time of the call /// First argument of the call private void ClearDepthStencil(IDeviceState state, int arg0) { int layerCount = state.Read(ZetaLayerCountOffset); _processor.ThreedClass.Clear(arg0, layerCount); } /// /// Performs a draw. /// /// GPU state at the time of the call /// First argument of the call private void DrawArraysInstanced(IDeviceState state, int arg0) { var topology = (PrimitiveTopology)arg0; var count = FetchParam(); var instanceCount = FetchParam(); var firstVertex = FetchParam(); var firstInstance = FetchParam(); if (ShouldSkipDraw(state, instanceCount.Word)) { return; } _processor.ThreedClass.Draw( topology, count.Word, instanceCount.Word, 0, firstVertex.Word, firstInstance.Word, indexed: false); } /// /// Performs a indexed draw. /// /// GPU state at the time of the call /// First argument of the call private void DrawElements(IDeviceState state, int arg0) { var topology = (PrimitiveTopology)arg0; var indexAddressHigh = FetchParam(); var indexAddressLow = FetchParam(); var indexType = FetchParam(); var firstIndex = 0; var indexCount = FetchParam(); _processor.ThreedClass.UpdateIndexBuffer( (uint)indexAddressHigh.Word, (uint)indexAddressLow.Word, (IndexType)indexType.Word); _processor.ThreedClass.Draw( topology, indexCount.Word, 1, firstIndex, state.Read(FirstVertexOffset), 0, indexed: true); } /// /// Performs a indexed draw. /// /// GPU state at the time of the call /// First argument of the call private void DrawElementsInstanced(IDeviceState state, int arg0) { var topology = (PrimitiveTopology)arg0; var count = FetchParam(); var instanceCount = FetchParam(); var firstIndex = FetchParam(); var firstVertex = FetchParam(); var firstInstance = FetchParam(); if (ShouldSkipDraw(state, instanceCount.Word)) { return; } _processor.ThreedClass.Draw( topology, count.Word, instanceCount.Word, firstIndex.Word, firstVertex.Word, firstInstance.Word, indexed: true); } /// /// Performs a indirect indexed draw, with parameters from a GPU buffer. /// /// GPU state at the time of the call /// First argument of the call private void DrawElementsIndirect(IDeviceState state, int arg0) { var topology = (PrimitiveTopology)arg0; var count = FetchParam(); var instanceCount = FetchParam(); var firstIndex = FetchParam(); var firstVertex = FetchParam(); var firstInstance = FetchParam(); ulong indirectBufferGpuVa = count.GpuVa; var bufferCache = _processor.MemoryManager.Physical.BufferCache; bool useBuffer = bufferCache.CheckModified(_processor.MemoryManager, indirectBufferGpuVa, IndirectIndexedDataEntrySize, out ulong indirectBufferAddress); if (useBuffer) { int indexCount = firstIndex.Word + count.Word; _processor.ThreedClass.DrawIndirect( topology, new MultiRange(indirectBufferAddress, IndirectIndexedDataEntrySize), default, 1, IndirectIndexedDataEntrySize, indexCount, IndirectDrawType.DrawIndexedIndirect); } else { if (ShouldSkipDraw(state, instanceCount.Word)) { return; } _processor.ThreedClass.Draw( topology, count.Word, instanceCount.Word, firstIndex.Word, firstVertex.Word, firstInstance.Word, indexed: true); } } /// /// Performs a indirect indexed multi-draw, with parameters from a GPU buffer. /// /// GPU state at the time of the call /// First argument of the call private void MultiDrawElementsIndirectCount(IDeviceState state, int arg0) { int arg1 = FetchParam().Word; int arg2 = FetchParam().Word; int arg3 = FetchParam().Word; int startDraw = arg0; int endDraw = arg1; var topology = (PrimitiveTopology)arg2; int paddingWords = arg3; int stride = paddingWords * 4 + 0x14; ulong parameterBufferGpuVa = FetchParam().GpuVa; int maxDrawCount = endDraw - startDraw; if (startDraw != 0) { int drawCount = _processor.MemoryManager.Read(parameterBufferGpuVa, tracked: true); // Calculate maximum draw count based on the previous draw count and current draw count. if ((uint)drawCount <= (uint)startDraw) { // The start draw is past our total draw count, so all draws were already performed. maxDrawCount = 0; } else { // Perform just the missing number of draws. maxDrawCount = (int)Math.Min((uint)maxDrawCount, (uint)(drawCount - startDraw)); } } if (maxDrawCount == 0) { Fifo.Clear(); return; } ulong indirectBufferGpuVa = 0; int indexCount = 0; for (int i = 0; i < maxDrawCount; i++) { var count = FetchParam(); #pragma warning disable IDE0059 // Remove unnecessary value assignment var instanceCount = FetchParam(); var firstIndex = FetchParam(); var firstVertex = FetchParam(); var firstInstance = FetchParam(); #pragma warning restore IDE0059 if (i == 0) { indirectBufferGpuVa = count.GpuVa; } indexCount = Math.Max(indexCount, count.Word + firstIndex.Word); if (i != maxDrawCount - 1) { for (int j = 0; j < paddingWords; j++) { FetchParam(); } } } var bufferCache = _processor.MemoryManager.Physical.BufferCache; ulong indirectBufferSize = (ulong)maxDrawCount * (ulong)stride; MultiRange indirectBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize, BufferStage.Indirect); MultiRange parameterBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, parameterBufferGpuVa, 4, BufferStage.Indirect); _processor.ThreedClass.DrawIndirect( topology, indirectBufferRange, parameterBufferRange, maxDrawCount, stride, indexCount, Threed.IndirectDrawType.DrawIndexedIndirectCount); } /// /// Checks if the draw should be skipped, because the masked instance count is zero. /// /// Current GPU state /// Draw instance count /// True if the draw should be skipped, false otherwise private static bool ShouldSkipDraw(IDeviceState state, int instanceCount) { return (Read(state, 0xd1b) & instanceCount) == 0; } /// /// Fetches a arguments from the arguments FIFO. /// /// The call argument, or a 0 value with null address if the FIFO is empty private FifoWord FetchParam() { if (!Fifo.TryDequeue(out var value)) { Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument."); return new FifoWord(0UL, 0); } return value; } /// /// Reads data from a GPU register. /// /// Current GPU state /// Register offset to read /// GPU register value private static int Read(IDeviceState state, int reg) { return state.Read(reg * 4); } } }