using Ryujinx.Common.Logging;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.GPFifo;
using Ryujinx.Graphics.Gpu.Engine.Threed;
using Ryujinx.Graphics.Gpu.Engine.Types;
using Ryujinx.Graphics.Gpu.Memory;
using Ryujinx.Memory.Range;
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
///
/// Macro High-level emulation.
///
class MacroHLE : IMacroEE
{
private const int ColorLayerCountOffset = 0x818;
private const int ColorStructSize = 0x40;
private const int ZetaLayerCountOffset = 0x1230;
private const int UniformBufferBindVertexOffset = 0x2410;
private const int FirstVertexOffset = 0x1434;
private const int IndirectIndexedDataEntrySize = 0x14;
private const int LogicOpOffset = 0x19c4;
private const int ShaderIdScratchOffset = 0x3470;
private const int ShaderAddressScratchOffset = 0x3488;
private const int UpdateConstantBufferAddressesBase = 0x34a8;
private const int UpdateConstantBufferSizesBase = 0x34bc;
private const int UpdateConstantBufferAddressCbu = 0x3460;
private readonly GPFifoProcessor _processor;
private readonly MacroHLEFunctionName _functionName;
///
/// Arguments FIFO.
///
public Queue Fifo { get; }
///
/// Creates a new instance of the HLE macro handler.
///
/// GPU GP FIFO command processor
/// Name of the HLE macro function to be called
public MacroHLE(GPFifoProcessor processor, MacroHLEFunctionName functionName)
{
_processor = processor;
_functionName = functionName;
Fifo = new Queue();
}
///
/// Executes a macro program until it exits.
///
/// Code of the program to execute
/// GPU state at the time of the call
/// Optional argument passed to the program, 0 if not used
public void Execute(ReadOnlySpan code, IDeviceState state, int arg0)
{
switch (_functionName)
{
case MacroHLEFunctionName.BindShaderProgram:
BindShaderProgram(state, arg0);
break;
case MacroHLEFunctionName.ClearColor:
ClearColor(state, arg0);
break;
case MacroHLEFunctionName.ClearDepthStencil:
ClearDepthStencil(state, arg0);
break;
case MacroHLEFunctionName.DrawArraysInstanced:
DrawArraysInstanced(state, arg0);
break;
case MacroHLEFunctionName.DrawElements:
DrawElements(state, arg0);
break;
case MacroHLEFunctionName.DrawElementsInstanced:
DrawElementsInstanced(state, arg0);
break;
case MacroHLEFunctionName.DrawElementsIndirect:
DrawElementsIndirect(state, arg0);
break;
case MacroHLEFunctionName.MultiDrawElementsIndirectCount:
MultiDrawElementsIndirectCount(state, arg0);
break;
case MacroHLEFunctionName.UpdateBlendState:
UpdateBlendState(state, arg0);
break;
case MacroHLEFunctionName.UpdateColorMasks:
UpdateColorMasks(state, arg0);
break;
case MacroHLEFunctionName.UpdateUniformBufferState:
UpdateUniformBufferState(state, arg0);
break;
case MacroHLEFunctionName.UpdateUniformBufferStateCbu:
UpdateUniformBufferStateCbu(state, arg0);
break;
case MacroHLEFunctionName.UpdateUniformBufferStateCbuV2:
UpdateUniformBufferStateCbuV2(state, arg0);
break;
default:
throw new NotImplementedException(_functionName.ToString());
}
// It should be empty at this point, but clear it just to be safe.
Fifo.Clear();
}
///
/// Binds a shader program with the index in arg0.
///
/// GPU state at the time of the call
/// First argument of the call
private void BindShaderProgram(IDeviceState state, int arg0)
{
int scratchOffset = ShaderIdScratchOffset + arg0 * 4;
int lastId = state.Read(scratchOffset);
int id = FetchParam().Word;
int offset = FetchParam().Word;
if (lastId == id)
{
FetchParam();
FetchParam();
return;
}
_processor.ThreedClass.SetShaderOffset(arg0, (uint)offset);
// Removes overflow on the method address into the increment portion.
// Present in the original macro.
int addrMask = unchecked((int)0xfffc0fff) << 2;
state.Write(scratchOffset & addrMask, id);
state.Write((ShaderAddressScratchOffset + arg0 * 4) & addrMask, offset);
int stage = FetchParam().Word;
uint cbAddress = (uint)FetchParam().Word;
_processor.ThreedClass.UpdateUniformBufferState(65536, cbAddress >> 24, cbAddress << 8);
int stageOffset = (stage & 0x7f) << 3;
state.Write((UniformBufferBindVertexOffset + stageOffset * 4) & addrMask, 17);
}
///
/// Updates uniform buffer state for update or bind.
///
/// GPU state at the time of the call
/// First argument of the call
private void UpdateUniformBufferState(IDeviceState state, int arg0)
{
uint address = (uint)state.Read(UpdateConstantBufferAddressesBase + arg0 * 4);
int size = state.Read(UpdateConstantBufferSizesBase + arg0 * 4);
_processor.ThreedClass.UpdateUniformBufferState(size, address >> 24, address << 8);
}
///
/// Updates uniform buffer state for update.
///
/// GPU state at the time of the call
/// First argument of the call
private void UpdateUniformBufferStateCbu(IDeviceState state, int arg0)
{
uint address = (uint)state.Read(UpdateConstantBufferAddressCbu);
UniformBufferState ubState = new()
{
Address = new()
{
High = address >> 24,
Low = address << 8
},
Size = 24320,
Offset = arg0 << 2
};
_processor.ThreedClass.UpdateUniformBufferState(ubState);
}
///
/// Updates uniform buffer state for update.
///
/// GPU state at the time of the call
/// First argument of the call
private void UpdateUniformBufferStateCbuV2(IDeviceState state, int arg0)
{
uint address = (uint)state.Read(UpdateConstantBufferAddressCbu);
UniformBufferState ubState = new()
{
Address = new()
{
High = address >> 24,
Low = address << 8
},
Size = 28672,
Offset = arg0 << 2
};
_processor.ThreedClass.UpdateUniformBufferState(ubState);
}
///
/// Updates blend enable using the given argument.
///
/// GPU state at the time of the call
/// First argument of the call
private void UpdateBlendState(IDeviceState state, int arg0)
{
state.Write(LogicOpOffset, 0);
Array8 enable = new();
for (int i = 0; i < 8; i++)
{
enable[i] = new Boolean32((uint)(arg0 >> (i + 8)) & 1);
}
_processor.ThreedClass.UpdateBlendEnable(ref enable);
}
///
/// Updates color masks using the given argument and three pushed arguments.
///
/// GPU state at the time of the call
/// First argument of the call
private void UpdateColorMasks(IDeviceState state, int arg0)
{
Array8 masks = new();
int index = 0;
for (int i = 0; i < 4; i++)
{
masks[index++] = new RtColorMask((uint)arg0 & 0x1fff);
masks[index++] = new RtColorMask(((uint)arg0 >> 16) & 0x1fff);
if (i != 3)
{
arg0 = FetchParam().Word;
}
}
_processor.ThreedClass.UpdateColorMasks(ref masks);
}
///
/// Clears one bound color target.
///
/// GPU state at the time of the call
/// First argument of the call
private void ClearColor(IDeviceState state, int arg0)
{
int index = (arg0 >> 6) & 0xf;
int layerCount = state.Read(ColorLayerCountOffset + index * ColorStructSize);
_processor.ThreedClass.Clear(arg0, layerCount);
}
///
/// Clears the current depth-stencil target.
///
/// GPU state at the time of the call
/// First argument of the call
private void ClearDepthStencil(IDeviceState state, int arg0)
{
int layerCount = state.Read(ZetaLayerCountOffset);
_processor.ThreedClass.Clear(arg0, layerCount);
}
///
/// Performs a draw.
///
/// GPU state at the time of the call
/// First argument of the call
private void DrawArraysInstanced(IDeviceState state, int arg0)
{
var topology = (PrimitiveTopology)arg0;
var count = FetchParam();
var instanceCount = FetchParam();
var firstVertex = FetchParam();
var firstInstance = FetchParam();
if (ShouldSkipDraw(state, instanceCount.Word))
{
return;
}
_processor.ThreedClass.Draw(
topology,
count.Word,
instanceCount.Word,
0,
firstVertex.Word,
firstInstance.Word,
indexed: false);
}
///
/// Performs a indexed draw.
///
/// GPU state at the time of the call
/// First argument of the call
private void DrawElements(IDeviceState state, int arg0)
{
var topology = (PrimitiveTopology)arg0;
var indexAddressHigh = FetchParam();
var indexAddressLow = FetchParam();
var indexType = FetchParam();
var firstIndex = 0;
var indexCount = FetchParam();
_processor.ThreedClass.UpdateIndexBuffer(
(uint)indexAddressHigh.Word,
(uint)indexAddressLow.Word,
(IndexType)indexType.Word);
_processor.ThreedClass.Draw(
topology,
indexCount.Word,
1,
firstIndex,
state.Read(FirstVertexOffset),
0,
indexed: true);
}
///
/// Performs a indexed draw.
///
/// GPU state at the time of the call
/// First argument of the call
private void DrawElementsInstanced(IDeviceState state, int arg0)
{
var topology = (PrimitiveTopology)arg0;
var count = FetchParam();
var instanceCount = FetchParam();
var firstIndex = FetchParam();
var firstVertex = FetchParam();
var firstInstance = FetchParam();
if (ShouldSkipDraw(state, instanceCount.Word))
{
return;
}
_processor.ThreedClass.Draw(
topology,
count.Word,
instanceCount.Word,
firstIndex.Word,
firstVertex.Word,
firstInstance.Word,
indexed: true);
}
///
/// Performs a indirect indexed draw, with parameters from a GPU buffer.
///
/// GPU state at the time of the call
/// First argument of the call
private void DrawElementsIndirect(IDeviceState state, int arg0)
{
var topology = (PrimitiveTopology)arg0;
var count = FetchParam();
var instanceCount = FetchParam();
var firstIndex = FetchParam();
var firstVertex = FetchParam();
var firstInstance = FetchParam();
ulong indirectBufferGpuVa = count.GpuVa;
var bufferCache = _processor.MemoryManager.Physical.BufferCache;
bool useBuffer = bufferCache.CheckModified(_processor.MemoryManager, indirectBufferGpuVa, IndirectIndexedDataEntrySize, out ulong indirectBufferAddress);
if (useBuffer)
{
int indexCount = firstIndex.Word + count.Word;
_processor.ThreedClass.DrawIndirect(
topology,
new MultiRange(indirectBufferAddress, IndirectIndexedDataEntrySize),
default,
1,
IndirectIndexedDataEntrySize,
indexCount,
IndirectDrawType.DrawIndexedIndirect);
}
else
{
if (ShouldSkipDraw(state, instanceCount.Word))
{
return;
}
_processor.ThreedClass.Draw(
topology,
count.Word,
instanceCount.Word,
firstIndex.Word,
firstVertex.Word,
firstInstance.Word,
indexed: true);
}
}
///
/// Performs a indirect indexed multi-draw, with parameters from a GPU buffer.
///
/// GPU state at the time of the call
/// First argument of the call
private void MultiDrawElementsIndirectCount(IDeviceState state, int arg0)
{
int arg1 = FetchParam().Word;
int arg2 = FetchParam().Word;
int arg3 = FetchParam().Word;
int startDraw = arg0;
int endDraw = arg1;
var topology = (PrimitiveTopology)arg2;
int paddingWords = arg3;
int stride = paddingWords * 4 + 0x14;
ulong parameterBufferGpuVa = FetchParam().GpuVa;
int maxDrawCount = endDraw - startDraw;
if (startDraw != 0)
{
int drawCount = _processor.MemoryManager.Read(parameterBufferGpuVa, tracked: true);
// Calculate maximum draw count based on the previous draw count and current draw count.
if ((uint)drawCount <= (uint)startDraw)
{
// The start draw is past our total draw count, so all draws were already performed.
maxDrawCount = 0;
}
else
{
// Perform just the missing number of draws.
maxDrawCount = (int)Math.Min((uint)maxDrawCount, (uint)(drawCount - startDraw));
}
}
if (maxDrawCount == 0)
{
Fifo.Clear();
return;
}
ulong indirectBufferGpuVa = 0;
int indexCount = 0;
for (int i = 0; i < maxDrawCount; i++)
{
var count = FetchParam();
#pragma warning disable IDE0059 // Remove unnecessary value assignment
var instanceCount = FetchParam();
var firstIndex = FetchParam();
var firstVertex = FetchParam();
var firstInstance = FetchParam();
#pragma warning restore IDE0059
if (i == 0)
{
indirectBufferGpuVa = count.GpuVa;
}
indexCount = Math.Max(indexCount, count.Word + firstIndex.Word);
if (i != maxDrawCount - 1)
{
for (int j = 0; j < paddingWords; j++)
{
FetchParam();
}
}
}
var bufferCache = _processor.MemoryManager.Physical.BufferCache;
ulong indirectBufferSize = (ulong)maxDrawCount * (ulong)stride;
MultiRange indirectBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize, BufferStage.Indirect);
MultiRange parameterBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, parameterBufferGpuVa, 4, BufferStage.Indirect);
_processor.ThreedClass.DrawIndirect(
topology,
indirectBufferRange,
parameterBufferRange,
maxDrawCount,
stride,
indexCount,
Threed.IndirectDrawType.DrawIndexedIndirectCount);
}
///
/// Checks if the draw should be skipped, because the masked instance count is zero.
///
/// Current GPU state
/// Draw instance count
/// True if the draw should be skipped, false otherwise
private static bool ShouldSkipDraw(IDeviceState state, int instanceCount)
{
return (Read(state, 0xd1b) & instanceCount) == 0;
}
///
/// Fetches a arguments from the arguments FIFO.
///
/// The call argument, or a 0 value with null address if the FIFO is empty
private FifoWord FetchParam()
{
if (!Fifo.TryDequeue(out var value))
{
Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
return new FifoWord(0UL, 0);
}
return value;
}
///
/// Reads data from a GPU register.
///
/// Current GPU state
/// Register offset to read
/// GPU register value
private static int Read(IDeviceState state, int reg)
{
return state.Read(reg * 4);
}
}
}